def toleranceLimitProcessing(self,data): # Tolerance limit processing in the EIA, tolerance limits are first # performed for the upper and lower bounds, then afterward for the # interval lengths (as opposed to all at once random.seed(1) resampledData = [random.choice(data) for x in xrange(2000)] (resampLower,resampUpper) = zip(*resampledData) meanLower = nanmean(resampLower) stdLower = nanstd(resampLower) * sqrt(len(data)) # *sqrt is to get population std from sample meanUpper = nanmean(resampUpper) stdUpper = nanstd(resampUpper) * sqrt(len(data)) # ditto K=[32.019, 32.019, 8.380, 5.369, 4.275, 3.712, 3.369, 3.136, 2.967, 2.839, 2.737, 2.655, 2.587, 2.529, 2.48, 2.437, 2.4, 2.366, 2.337, 2.31, 2.31, 2.31, 2.31, 2.31, 2.208] # taken from Liu/Mendel matlab code, in turn from Walpole,Myers,Myers,Ye2008 k = K[min(len(data),24)] acceptableLower = (meanLower-k*stdLower, meanLower+k*stdLower) acceptableUpper = (meanUpper-k*stdUpper, meanUpper+k*stdUpper) for (l,u) in data[:]: try: if not acceptableLower[0] <= l <= acceptableLower[1]: raise ValueError("Intolerable: lower bound %s not in %s" % (str(l), str(acceptableLower)),(l,u)) if not acceptableUpper[0] <= u <= acceptableUpper[1]: raise ValueError("Intolerable: upper bound %s not in %s" % (str(u), str(acceptableUpper)),(l,u)) except ValueError as (e,d): #print e #print "Intolerable: removing data point %s" % str(d) data.remove(d)
def toleranceLimitProcessing(self, data): # Tolerance limit processing in the EIA, tolerance limits are first # performed for the upper and lower bounds, then afterward for the # interval lengths (as opposed to all at once random.seed(1) resampledData = [random.choice(data) for x in xrange(2000)] (resampLower, resampUpper) = zip(*resampledData) meanLower = nanmean(resampLower) stdLower = nanstd(resampLower) * sqrt( len(data)) # *sqrt is to get population std from sample meanUpper = nanmean(resampUpper) stdUpper = nanstd(resampUpper) * sqrt(len(data)) # ditto K = [ 32.019, 32.019, 8.380, 5.369, 4.275, 3.712, 3.369, 3.136, 2.967, 2.839, 2.737, 2.655, 2.587, 2.529, 2.48, 2.437, 2.4, 2.366, 2.337, 2.31, 2.31, 2.31, 2.31, 2.31, 2.208 ] # taken from Liu/Mendel matlab code, in turn from Walpole,Myers,Myers,Ye2008 k = K[min(len(data), 24)] acceptableLower = (meanLower - k * stdLower, meanLower + k * stdLower) acceptableUpper = (meanUpper - k * stdUpper, meanUpper + k * stdUpper) for (l, u) in data[:]: try: if not acceptableLower[0] <= l <= acceptableLower[1]: raise ValueError( "Intolerable: lower bound %s not in %s" % (str(l), str(acceptableLower)), (l, u)) if not acceptableUpper[0] <= u <= acceptableUpper[1]: raise ValueError( "Intolerable: upper bound %s not in %s" % (str(u), str(acceptableUpper)), (l, u)) except ValueError as (e, d): #print e #print "Intolerable: removing data point %s" % str(d) data.remove(d)
def timeseries(iData, zoneMap, std=None): ''' Make zone-wise averaging of input data input: 3D matrix(Layers x Width x Height) and map of zones (W x H) output: 2D matrices(L x WH) with mean and std ''' #reshape input cube into 2D matrix r, h, w = iData.shape iData, notNanDataI = cube2flat(iData) #get unique values of not-nan labels uniqZones = np.unique(zoneMap[np.isfinite(zoneMap)]) zoneNum = np.zeros((r, uniqZones.size)) zoneMean = np.zeros((r, uniqZones.size)) zoneStd = np.zeros((r, uniqZones.size)) #in each zone: get all values from input data get not nan data average for i in range(uniqZones.size): zi = uniqZones[i] if not np.isnan(zi): zoneData = iData[:, zoneMap.flat == zi] zoneNum[:, i] = zi zoneMean[:, i] = st.nanmean(zoneData, axis=1) zoneStd[:, i] = st.nanstd(zoneData, axis=1) if std is not None: # filter out of maxSTD values outliers = (np.abs(zoneData.T - zoneMean[:, i]) > zoneStd[:, i] * std).T zoneData[outliers] = np.nan zoneMean[:, i] = st.nanmean(zoneData, axis=1) zoneStd[:, i] = st.nanstd(zoneData, axis=1) return zoneMean, zoneStd, zoneNum
def get_randomized_group_average_speed_profiles(profiles): stAvg = [] unAvg = [] stErr = [] unErr = [] for avgSpeeds, trialTypes in profiles: # Plot Average Speeds in bins stableTrials = np.where(trialTypes == 0) unstableTrials = np.where(trialTypes == 1) mSt = stats.nanmean(avgSpeeds[stableTrials, :], 1) mUn = stats.nanmean(avgSpeeds[unstableTrials, :], 1) eSt = stats.nanstd(avgSpeeds[stableTrials, :], 1) / np.sqrt(np.size(stableTrials) - 1) eUn = stats.nanstd(avgSpeeds[unstableTrials, :], 1) / np.sqrt(np.size(unstableTrials) - 1) mSt = mSt[0] mUn = mUn[0] eSt = eSt[0] eUn = eUn[0] stAvg.append(mSt) unAvg.append(mUn) stErr.append(eSt) unErr.append(eUn) return (stAvg, stErr), (unAvg, unErr)
def get3NetworkAvg(data_t, titleName, roiNames, numRuns): #Define the streams #Ventral=[1, 3, 11, 12, 13, 14] #Dorsal=[2, 4, 5, 6, 7, 8, 9, 10] #Lateral=[0, 1, 2, 3, 4] Lateral=[0,1,2,8,9] Dorsal=[8,9,10, 11, 12, 13, 14, 15] Ventral=[1,2, 3, 4, 5, 6] print 'Ventral rois: '+ str(roiNames[Ventral]) print 'Dorsal rois: ' + str(roiNames[Dorsal]) print 'Early Visual rois: '+ str(roiNames[Lateral]) # Get network averages lateralCoher=getNetworkWithin(data_t, Lateral) dorsalCoher=getNetworkWithin(data_t, Ventral) ventralCoher=getNetworkWithin(data_t, Dorsal) #allMeansWithin=(stats.nanmean(lateralCoher.flat), stats.nanmean(dorsalCoher.flat), stats.nanmean(ventralCoher.flat)) #allSTDWithin=(stats.nanstd(lateralCoher.flat), stats.nanstd(dorsalCoher.flat), stats.nanstd(ventralCoher.flat)) allMeansWithin= (stats.nanmean(dorsalCoher.flat), stats.nanmean(ventralCoher.flat)) allSTDWithin=( stats.nanstd(dorsalCoher.flat), stats.nanstd(ventralCoher.flat)) latBtwCoher=getNetworkBtw(data_t, Lateral, Ventral+Dorsal) dorsBtwCoher=getNetworkBtw(data_t, Dorsal, Ventral) ventBtwCoher=getNetworkBtw(data_t, Ventral, Dorsal) #allMeansBtw=(stats.nanmean(latBtwCoher), stats.nanmean(dorsBtwCoher), stats.nanmean(ventBtwCoher)) #allSTDBtw=(stats.nanstd(latBtwCoher), stats.nanstd(dorsBtwCoher), stats.nanstd(ventBtwCoher)) # Just dorsal versus ventral allMeansBtw=( stats.nanmean(dorsBtwCoher), stats.nanmean(ventBtwCoher)) allSTDBtw=( stats.nanstd(dorsBtwCoher), stats.nanstd(ventBtwCoher)) # Make bar graph title= titleName+ 'by Network for ' +sub+ ' for '+ str(numRuns)+' runs'; labels=( 'Dorsal', 'Ventral') makeBarPlots(allMeansWithin, allSTDWithin, allMeansBtw, allSTDBtw, title, labels)
def despike(self, n=3, recursive=False, verbose=False): """Replace spikes with np.NaN. Removing spikes that are >= n * std. default n = 3.""" result = self.values.copy() outliers = (np.abs(self.values - nanmean(self.values)) >= n * nanstd(self.values)) removed = np.count_nonzero(outliers) result[outliers] = np.NaN if verbose and not recursive: print("Removing from %s\n # removed: %s" % (self.name, removed)) counter = 0 if recursive: while outliers.any(): result[outliers] = np.NaN outliers = np.abs(result - nanmean(result)) >= n * nanstd(result) counter += 1 removed += np.count_nonzero(outliers) if verbose: print("Removing from %s\nNumber of iterations: %s # removed: %s" % (self.name, counter, removed)) return Series(result, index=self.index, name=self.name)
def despike(self, n=3, recursive=False, verbose=False): """ Replace spikes with np.NaN. Removing spikes that are >= n * std. default n = 3. """ result = self.values.copy() outliers = (np.abs(self.values - nanmean(self.values)) >= n * nanstd(self.values)) removed = np.count_nonzero(outliers) result[outliers] = np.NaN if verbose and not recursive: print("Removing from %s\n # removed: %s" % (self.name, removed)) counter = 0 if recursive: while outliers.any(): result[outliers] = np.NaN outliers = np.abs(result - nanmean(result)) >= n * nanstd(result) counter += 1 removed += np.count_nonzero(outliers) if verbose: print("Removing from %s\nNumber of iterations: %s # removed: %s" % (self.name, counter, removed)) return Series(result, index=self.index, name=self.name)
def reasonableIntervalProcessing(self, data): databackup = data[:] #keep backup in case all intervals are deleted random.seed(1) resampledData = [random.choice(data) for x in xrange(2000)] (resampLower, resampUpper) = zip(*resampledData) resampInterval = map(lambda x: x[1] - x[0], resampledData) meanLower = nanmean(resampLower) stdLower = nanstd(resampLower) * sqrt( len(data) ) # it appears *sqrt is done to estimage population std from sample meanUpper = nanmean(resampUpper) stdUpper = nanstd(resampUpper) * sqrt(len(data)) # ditto meanInterval = nanmean(resampInterval) stdInterval = nanstd(resampInterval) * sqrt(len(data)) # ditto if stdLower + stdUpper == 0: barrier = (meanLower + meanUpper) / 2 print "barrierAvg", barrier elif stdLower == 0: barrier = meanLower + .5 print "barrierlower", barrier elif stdUpper == 0: barrier = meanUpper - .5 print "barrierupper", barrier else: barrier1 = ( -(meanLower * stdUpper**2 - meanUpper * stdLower**2) + stdLower * stdUpper * sqrt( (meanLower - meanUpper)**2 + 2 * (stdLower**2 - stdUpper**2) * log(stdLower / stdUpper)) ) / (stdLower**2 - stdUpper**2) barrier2 = ( -(meanLower * stdUpper**2 - meanUpper * stdLower**2) - stdLower * stdUpper * sqrt( (meanLower - meanUpper)**2 + 2 * (stdLower**2 - stdUpper**2) * log(stdLower / stdUpper)) ) / (stdLower**2 - stdUpper**2) if barrier1 >= meanLower and barrier1 <= meanUpper: barrier = barrier1 print "barrier1", barrier else: barrier = barrier2 print "barrier2", barrier for (l, u) in data[:]: try: #if l > barrier+(.1*stdLower) or u < barrier-(.1*stdUpper): #if l > barrier+stdLower or u < barrier-stdUpper: #if l > barrier and u < barrier: if l > barrier + (.001 * stdLower) or u < barrier - (.001 * stdUpper): raise ValueError( "Unreasonable: interval %s does not cross reasonable barrier %s" % (str((l, u)), str(barrier)), (l, u)) except ValueError as (e, d): #print e #print "Unreasonable: removing data point %s" % str(d) data.remove(d)
def calc_clipped_stats_old(data, clip=3.0, nIter=10): """Calculate the mean and stdev of an array given a sigma clip""" data = np.array(data).flatten() mean = float(stats.nanmean(data)) std = float(stats.nanstd(data)) mad = float(MAD(data)) if clip > 0.0: convergeFlg = 0 itCnt = 0 while convergeFlg==0 and itCnt<nIter: meanOld, stdOld, madOld = mean, std, mad minVal = mean - (clip * mad) maxVal = mean + (clip * mad) # Blank values outside the 3-sigma range dataMsk = np.where(np.greater(data, maxVal), np.nan, data) dataMsk = np.where(np.less(data, minVal), np.nan, dataMsk) # Measure the statistics mean = stats.nanmean(dataMsk) median = stats.nanmedian(dataMsk) std = stats.nanstd(dataMsk) mad = MAD(dataMsk) npix = np.sum(np.where(np.isnan(dataMsk),0.0,1.0)) dataMsk = [] if mean == meanOld and mad == madOld: convergFlg = 1 itCnt += 1 # Assemble the measurements into a dictionary m = {} m['mean'] = float(mean) m['median'] = float(median) m['stdev'] = float(std) m['madfm'] = float(mad) m['npix'] =int(npix) m['max'] = float(np.nanmax(data)) m['min'] = float(np.nanmin(data)) del data # If all nans if m['npix'] == 0: m['stdev'] = 0.0 m['mean'] = 0.0 m['median'] = 0.0 m['max'] = 0.0 m['min'] = 0.0 m['centmax'] = (0.0,0.0) m['madfm'] = 0.0 m['success'] = False else: m['success'] = True return m
def toleranceLimitProcessing(self, data): # Tolerance limit processing random.seed(1) resampledData = [random.choice(data) for x in xrange(2000)] #address default values: # middle = nanmean([(d[1]+d[0])/2 for d in data])/(self.r[1]-self.r[0]) # print "middle", middle # if(middle < .35): # print "filtering higher range" # f = lambda x: x[1] != 100 or random.random() > .3 # resampledData = filter(f, resampledData) # if(middle > .65*(self.r[1]-self.r[0])): # print "filtering higher range" # f = lambda x: x[0] != 0 or random.random() > .3 # resampledData = filter(f, resampledData) # f = lambda x: (x[0] != 0 and x[1]!=100) or random.random() > .1 # resampledData = filter(f, resampledData) # print "resampled data length", len(resampledData) (resampLower, resampUpper) = zip(*resampledData) resampInterval = map(lambda x: x[1] - x[0], resampledData) meanLower = nanmean(resampLower) stdLower = nanstd(resampLower) * sqrt( len(data) ) # it appears *sqrt is done to estimage population std from sample meanUpper = nanmean(resampUpper) stdUpper = nanstd(resampUpper) * sqrt(len(data)) # ditto meanInterval = nanmean(resampInterval) stdInterval = nanstd(resampInterval) * sqrt(len(data)) # ditto K = [ 32.019, 32.019, 8.380, 5.369, 4.275, 3.712, 3.369, 3.136, 2.967, 2.839, 2.737, 2.655, 2.587, 2.529, 2.48, 2.437, 2.4, 2.366, 2.337, 2.31, 2.31, 2.31, 2.31, 2.31, 2.208 ] # taken from Liu/Mendel matlab code, in turn from Walpole,Myers,Myers,Ye2008 k = K[min(len(data), 24)] acceptableLower = (meanLower - k * stdLower, meanLower + k * stdLower) acceptableUpper = (meanUpper - k * stdUpper, meanUpper + k * stdUpper) acceptableInterval = (meanInterval - k * stdInterval, meanInterval + k * stdInterval) for (l, u) in data[:]: try: if not acceptableLower[0] <= l <= acceptableLower[1]: raise ValueError( "Intolerable: lower bound %s not in %s" % (str(l), str(acceptableLower)), (l, u)) if not acceptableUpper[0] <= u <= acceptableUpper[1]: raise ValueError( "Intolerable: upper bound %s not in %s" % (str(u), str(acceptableUpper)), (l, u)) if not acceptableInterval[0] <= u - l <= acceptableInterval[1]: raise ValueError( "Intolerable: interval %s greater than %s" % (str(u - l), str(acceptableInterval)), (l, u)) except ValueError as (e, d): #print e #print "Intolerable: removing data point %s" % str(d) data.remove(d)
def calc_clipped_stats_old(data, clip=3.0, nIter=10): data = np.array(data).flatten() mean = float(stats.nanmean(data)) std = float(stats.nanstd(data)) mad = float(MAD(data)) if clip > 0.0: convergeFlg = 0 itCnt = 0 while convergeFlg == 0 and itCnt < nIter: meanOld, stdOld, madOld = mean, std, mad minVal = mean - (clip * mad) maxVal = mean + (clip * mad) # Blank values outside the 3-sigma range dataMsk = np.where(np.greater(data, maxVal), np.nan, data) dataMsk = np.where(np.less(data, minVal), np.nan, dataMsk) # Measure the statistics mean = stats.nanmean(dataMsk) median = stats.nanmedian(dataMsk) std = stats.nanstd(dataMsk) mad = MAD(dataMsk) npix = np.sum(np.where(np.isnan(dataMsk), 0.0, 1.0)) dataMsk = [] if mean == meanOld and mad == madOld: convergFlg = 1 itCnt += 1 # Assemble the measurements into a dictionary m = {} m['mean'] = float(mean) m['median'] = float(median) m['stdev'] = float(std) m['madfm'] = float(mad) m['npix'] = int(npix) m['max'] = float(np.nanmax(data)) m['min'] = float(np.nanmin(data)) del data # If all nans if m['npix'] == 0: m['stdev'] = 0.0 m['mean'] = 0.0 m['median'] = 0.0 m['max'] = 0.0 m['min'] = 0.0 m['centmax'] = (0.0, 0.0) m['madfm'] = 0.0 m['success'] = False else: m['success'] = True return m
def reasonableIntervalProcessing(self,data): databackup = data[:] #keep backup in case all intervals are deleted random.seed(1) resampledData = [random.choice(data) for x in xrange(2000)] (resampLower,resampUpper) = zip(*resampledData) resampInterval = map(lambda x: x[1]-x[0], resampledData) meanLower = nanmean(resampLower) stdLower = nanstd(resampLower) * sqrt(len(data)) # it appears *sqrt is done to estimate population std from sample meanUpper = nanmean(resampUpper) stdUpper = nanstd(resampUpper) * sqrt(len(data)) # ditto meanInterval = nanmean(resampInterval) stdInterval = nanstd(resampInterval) * sqrt(len(data)) # ditto if stdLower+stdUpper==0: barrier = (meanLower+meanUpper)/2 print "barrierAvg", barrier elif stdLower == 0: barrier = meanLower+.5 print "barrierlower", barrier elif stdUpper == 0: barrier = meanUpper-.5 print "barrierupper", barrier else: barrier1 = ( (meanUpper*stdLower**2-meanLower*stdUpper**2) + stdLower*stdUpper*sqrt((meanLower-meanUpper)**2 + 2*(stdLower**2-stdUpper**2)*log(stdLower/stdUpper)) ) /(stdLower**2-stdUpper**2) barrier2 = ( (meanUpper*stdLower**2-meanLower*stdUpper**2) - stdLower*stdUpper*sqrt((meanLower-meanUpper)**2 + 2*(stdLower**2-stdUpper**2)*log(stdLower/stdUpper)) )/(stdLower**2-stdUpper**2) print "barrier1", barrier1 print "barrier2", barrier2 if barrier1 >= meanLower and barrier1 <= meanUpper: barrier = barrier1 print "barrier1", barrier #elif barrier2 >= meanLower and barrier1 <= meanUpper: else: barrier = barrier2 print "barrier2", barrier for (l,u) in data[:]: try: #if l > barrier+(.1*stdLower) or u < barrier-(.1*stdUpper): #if l > barrier+stdLower or u < barrier-stdUpper: #if l > barrier and u < barrier: #if l > barrier+(.001*stdLower) or u < barrier-(.001*stdUpper): if not (2*meanLower - barrier) <= l <= barrier <= u <= (2*meanUpper- barrier): raise ValueError("Unreasonable: interval %s does not cross reasonable barrier %s" % (str((l,u)), str(barrier)),(l,u)) except ValueError as (e,d): #print e #print "Unreasonable: removing data point %s" % str(d) data.remove(d)
def return_speedup_stats(x, y): speedup_stats = { 'ratio_of_the_means': stats.nanmean(x) / stats.nanmean(y), 'ratio_of_the_medians': stats.nanmedian(x) / stats.nanmedian(y), 'ratio_of_the_stddevs': stats.nanstd(x) / stats.nanstd(y), 'ratio_max_to_min': np.amax(x) / np.amin(y), 'ratio_min_to_max': np.amin(x) / np.amax(y) } return speedup_stats
def calc_perf_stats(self): """Calculates mean performance based on trimmed time series.""" self.mean_tsr, self.std_tsr = nanmean(self.tsr), nanstd(self.tsr) self.mean_cp, self.std_cp = nanmean(self.cp), nanstd(self.cp) self.mean_cd, self.std_cd = nanmean(self.cd), nanstd(self.cd) self.mean_ct, self.std_ct = nanmean(self.ct), nanstd(self.ct) self.mean_u_enc = nanmean(self.tow_speed) self.mean_tow_speed = self.mean_u_enc self.std_u_enc = nanstd(self.tow_speed) self.std_tow_speed = self.std_u_enc
def removeoutliers(inarray,stdcut=3.0): #bonehead outlier cut, stdcut is how many sigma, replace with nearest neighbor #first mark the bad numbers inarray[np.logical_not(np.isfinite(inarray))]=0. indexarray=np.arange(len(inarray)) badi=indexarray[np.abs(inarray-nanmedian(inarray)) > stdcut*nanstd(inarray) ] goodi=indexarray[np.abs(inarray-nanmedian(inarray)) <= stdcut*nanstd(inarray) ] outarray=inarray for i in badi: outarray[i]=inarray[np.abs(goodi-i).argmin()] return outarray
def extract_pigments(f): ''' extract pigments data of *.txt files, and return a list of dictionaries. INPUT ----- f : str or pandas object string of specific .csv file OUTPUT ------ var: list of dictionaries, containing pigments and informations. ''' stringy = str(f) term = stringy[stringy.rfind('.'):stringy.rfind('.')+4] if term == '.csv': dat = np.genfromtxt(stringy, names=True, dtype=None, delimiter=',') print(dat) lista = dat.dtype.names else: dat = f lista = dat.keys() dicts = [] nd = {} parse = ['station','treatment','time'] for k in lista: if k in parse: continue nd[k] = [] nd['name'] = k nd['local'] = dat['station'][0] nd['ct0'] = dat[k][(dat['treatment']=='Initial')] nd['ct1'] = dat[k][(dat['treatment']=='Control') & (dat['time']=='T1')] nd['ct2'] = dat[k][(dat['treatment']=='Control') & (dat['time']=='T2')] nd['ft0'] = dat[k][(dat['treatment']=='Initial')] nd['ft1'] = dat[k][(dat['treatment']=='Fe') & (dat['time']=='T1')] nd['ft2'] = dat[k][(dat['treatment']=='Fe') & (dat['time']=='T2')] nd['dt0'] = dat[k][(dat['treatment']=='Initial')] nd['dt1'] = dat[k][(dat['treatment']=='DFA') & (dat['time']=='T1')] nd['dt2'] = dat[k][(dat['treatment']=='DFA') & (dat['time']=='T2')] nd['xcontrol'] = np.append(nanmean(nd['ct0']),(nanmean(nd['ct1']), nanmean(nd['ct2']))) nd['xferro'] = np.append(nanmean(nd['ft0']),(nanmean(nd['ft1']), nanmean(nd['ft2']))) nd['xdfa'] = np.append(nanmean(nd['dt0']),(nanmean(nd['dt1']), nanmean(nd['dt2']))) nd['econtrol'] = np.append(nanstd(nd['ct0']),(nanstd(nd['ct1']), nanstd(nd['ct2']))) nd['eferro'] = np.append(nanstd(nd['ft0']),(nanstd(nd['ft1']), nanstd(nd['ft2']))) nd['edfa'] = np.append(nanstd(nd['dt0']),(nanstd(nd['dt1']), nanstd(nd['dt2']))) if nd: dicts.append(nd) nd = {} return dicts
def make_plots(self, num_bins=50): import matplotlib.pyplot as p ## Histogram of Widths widths = [float(x) for x in self.dataframe["Widths"] if is_float_try(x)] widths_stats = [nanmean(widths), nanstd(widths), nanmedian(widths)] ## Histogram of Lengths lengths = self.dataframe["Lengths"] lengths_stats = [nanmean(lengths), nanstd(lengths), nanmedian(lengths)] ## Histogram of Curvature rht_curvature = self.dataframe["RHT Curvature"] rht_curvature_stats = [nanmean(rht_curvature), nanstd(rht_curvature), nanmedian(rht_curvature)] if self.verbose: print "Widths Stats: %s" % (widths_stats) print "Lengths Stats: %s" % (lengths_stats) print "Curvature Stats: %s" % (rht_curvature_stats) p.subplot(131) p.hist(widths, num_bins) p.xlabel("Widths (pc)") p.subplot(132) p.hist(lengths, num_bins) p.xlabel("Lengths (pc)") p.subplot(133) p.hist(curvature, num_bins) p.xlabel("Curvature") p.show() if self.save: p.hist(widths, num_bins) p.xlabel("Widths (pc)") p.savefig("".join([self.save_name,"_widths.pdf"])) p.close() p.hist(lengths, num_bins) p.xlabel("Lengths (pc)") p.savefig("".join([self.save_name,"_lengths.pdf"])) p.close() p.hist(rht_curvature, num_bins) p.xlabel("RHT Curvature") p.savefig("".join([self.save_name,"_rht_curvature.pdf"])) p.close() return self
def plot_randomized_speed_profiles(avgSpeeds, trialTypes): # Set Plotting Attributes color1 = (0.0, 0.0, 0.0, 0.1) color2 = (1.0, 0.6, 0.0, 0.1) color1b = (0.0, 0.0, 0.0, 1.0) color2b = (1.0, 0.6, 0.0, 1.0) traceColors = [color1, color2] boldColors = [color1b, color2b] # Plot Average Speeds in bins plt.figure() numTrials = np.size(trialTypes) for t in range(0, numTrials): if trialTypes[t] == 0: plt.plot(avgSpeeds[t, :], color=color1) else: plt.plot(avgSpeeds[t, :], color=color2) stableTrials = np.where(trialTypes == 0) unstableTrials = np.where(trialTypes == 1) mSt = stats.nanmean(avgSpeeds[stableTrials, :], 1) mUn = stats.nanmean(avgSpeeds[unstableTrials, :], 1) eSt = stats.nanstd(avgSpeeds[stableTrials, :], 1) / np.sqrt(np.size(stableTrials) - 1) eUn = stats.nanstd(avgSpeeds[unstableTrials, :], 1) / np.sqrt(np.size(unstableTrials) - 1) # eSt = stats.nanstd(avgSpeeds[stableTrials, :], 1) # eUn = stats.nanstd(avgSpeeds[unstableTrials, :], 1) mSt = mSt[0] mUn = mUn[0] eSt = eSt[0] eUn = eUn[0] plt.plot(mUn, color=color2b, linewidth=7) plt.plot(mSt, color=color1b, linewidth=7) # plt.plot(mSt + eSt, color=color1b, linewidth = 0.5) # plt.plot(mSt - eSt, color=color1b, linewidth = 0.5) # plt.plot(mUn + eUn, color=color2b, linewidth = 0.5) # plt.plot(mUn - eUn, color=color2b, linewidth = 0.5) #pltutils.fix_font_size() plt.xlabel('crossing extent (cm)') plt.ylabel('normalized horizontal speed') pltutils.fix_font_size() plt.axis([0, 39, 0, 3])
def gen_exp_analysis_2d(f, key = 'rrtf_noise', key_dtype = '7f4'): db = h5_to_numpy(f, [key, 'rrs'], [key_dtype, '7f4']) fig = plt.figure() ax = fig.add_subplot(111) gens = ['ko', 'wt'] exps = ['nai', 'exp'] pltopts = {'ko' : {'nai' : {'color' : 'r', 'ls' : '-'}, 'exp' : {'color' : 'r', 'ls' : '--'}}, 'wt' : {'nai' : {'color' : 'b', 'ls' : '-'}, 'exp' : {'color' : 'b', 'ls' : '--'}}} # t = np.arange(7) rrs = db['rrs'][0] leg = [] for i, gen in enumerate(gens): for j, exp in enumerate(exps): ix = np.vstack((db['gen']==gen, db['exp']==exp)).all(0) db_ = db[ix] nunits = db_.size # y = st.nanmean(db_[key], 0) yerr = st.nanstd(db_[key], 0) / np.sqrt(nunits) ax.errorbar(rrs, y, yerr = yerr, color = pltopts[gen][exp]['color'], ls = pltopts[gen][exp]['ls']) leg.append('-'.join((gen, exp))) ax.legend(leg) # ax.set_title('Evoked PSTHs') # ax.set_xlabel('Time (ms)') # ax.set_ylabel('Firing rate (spks/s)') plt.show()
def rate_startle_ratio(data, title = None, ax = None, show_all = True): if ax is None: fig = plt.figure(); ax = fig.add_subplot(111); freqs = np.unique(data['freq']) nfreqs = freqs.size rates = np.unique(data['rate']) nrates = rates.size animals = np.unique(data['animal']) nanimals = animals.size x = np.arange(nrates) ppi = np.empty((nfreqs, nrates, nanimals)) for f, freq in enumerate(freqs): for r, rate in enumerate(rates): for a, animal in enumerate(animals): dat_ = data[np.c_[data['freq']==freq, data['rate']==rate, data['animal']==animal].all(1)] ppi[f, r, a] = calc_rate_startle_ratio(dat_) ax.errorbar(x+nrates*f, st.nanmean(ppi[f, ...], 1), yerr = st.nanstd(ppi[f, ...], 1), lw = 3) if show_all: ax.plot(x+nrates*f, ppi[f, ...], color = '0.7') ax.set_xticks(np.arange(nfreqs*nrates)) ax.set_xticklabels(np.tile(rates, nfreqs)) ax.axhline(1, color = 'r', ls = '--') ax.set_ylabel('PPI') ax.set_xlabel('Rate (pps)') ax.set_title(title)
def get_total_task_counts(fluc_levels,fluc_type): for fluc_level in fluc_levels: task_counts_mean=[] task_counts_se=[] task_counts_sd=[] task_counts=[] for replicate in range(1,31): replicate_counts=[] tasks_for_replicate=get_file_lines("../data_"+str(fluc_type)+"_"+str(fluc_level)+"/replicate_"+str(replicate)+"/tasks.dat") for i in range(len(tasks_for_replicate)): if len(tasks_for_replicate[i])!=0 and tasks_for_replicate[i][0]!="#": temp=str(tasks_for_replicate[i]).split(" ") update_task_count=0 for j in range(1,10): update_task_count+=float(temp[j]) replicate_counts+=[update_task_count] assert len(replicate_counts)==500000/50+1,""+str(len(replicate_counts)) task_counts+=[copy.deepcopy(replicate_counts)] assert len(task_counts)==30,""+str(len(task_counts)) for update in range(0,400001,50): update_data=[float(task_counts[i][update/50]) for i in range(30)] task_counts_mean+=[stats.nanmean(update_data)] task_counts_se+=[stats.sem(update_data)] task_counts_sd+=[stats.nanstd(update_data)] pickle.dump(task_counts_mean,open("../plot_data/total_task_counts_mean_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb")) pickle.dump(task_counts_se,open("../plot_data/total_task_counts_se_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb")) pickle.dump(task_counts_sd,open("../plot_data/total_task_counts_sd_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb")) return "success"
def bar_by_indep_2d(dep_key, indep_key, data, ax = None, bins = None, color = 'b', show_all = False): x = np.asarray(data[indep_key]) y = np.asarray(data[dep_key]) if bins is None: x_bin = x else: x_bin = misc.bin(x, bins) bins = np.unique(x_bin) nbins = bins.size y_mean = np.empty(nbins) y_sem = np.empty(nbins) for i in range(nbins): y_ = y[x_bin == bins[i]] y_mean[i] = st.nanmean(y_) y_sem[i] = st.nanstd(y_) / np.sqrt(y_.size) if ax is None: fig = plt.figure(); ax = fig.add_subplot(111); if show_all: ax.scatter(x, y, color = color, alpha = 0.25) lw = 2 else: lw = 1 ax.errorbar(bins, y_mean, yerr = y_sem, color = color, lw = lw) ax.set_xlim([bins[0]-1, bins[-1]+1]) plt.show()
def gap_startle_ratio(data, title = None, ax = None, show_all = True, animals = None): if ax is None: fig = plt.figure(); ax = fig.add_subplot(111); freqs = np.unique(data['freq']) nfreqs = freqs.size gaps = np.unique(data['gap']) ngaps = gaps.size if animals is None: animals = np.unique(data['animal']) nanimals = animals.size x = np.arange(ngaps) ppi = np.empty((nfreqs, ngaps, nanimals)) for f, freq in enumerate(freqs): for a, animal in enumerate(animals): dat = data[np.c_[data['animal']==animal, data['freq']==freq].all(1)] basestartle = dat[dat['gap']==0]['maxstartle'].mean() for r, gap in enumerate(gaps): dat_ = dat[dat['gap']==gap]['maxstartle'] ppi[f, r, a] = dat_.mean() / basestartle ax.errorbar(x+ngaps*f, st.nanmean(ppi[f, ...], 1), yerr = st.nanstd(ppi[f, ...], 1), lw = 3) if show_all: ax.plot(x+ngaps*f, ppi[f, ...], color = '0.7') ax.set_xticks(np.arange(nfreqs*ngaps)) ax.set_xticklabels(np.tile(gaps, nfreqs)) ax.axhline(1, color = 'r', ls = '--') ax.set_ylabel('PPI') ax.set_xlabel('Gap duration (s)') ax.set_title(title)
def get_count(data_category,specific_data,fluc_levels,fluc_type): assert data_category in ["resource","tasks"] assert type(fluc_levels)==list assert type(fluc_type)==str assert fluc_type in ["sync","stag","lowhigh"] assert specific_data>=0 assert specific_data<=8 for fluc_level in fluc_levels: treatment_counts_mean=[] treatment_counts_se=[] treatment_counts_sd=[] treatment_counts=[] for replicate in range(1,31): replicate_counts=[] data_for_replicate=get_file_lines("../data_"+str(fluc_type)+"_"+str(fluc_level)+"/replicate_"+str(replicate)+"/"+str(data_category)+".dat") for i in range(len(data_for_replicate)): if len(data_for_replicate[i])!=0 and data_for_replicate[i][0]!="#": temp=str(data_for_replicate[i]).split(" ") update_count=float(temp[specific_data+1]) replicate_counts+=[update_count] assert len(replicate_counts)==500000/50+1,""+str(len(replicate_counts)) treatment_counts+=[copy.deepcopy(replicate_counts)] assert len(treatment_counts)==30,""+str(len(treatment_counts)) for update in range(0,400001,50): update_data=[float(treatment_counts[i][update/50]) for i in range(30)] treatment_counts_mean+=[stats.nanmean(update_data)] treatment_counts_se+=[stats.sem(update_data)] treatment_counts_sd+=[stats.nanstd(update_data)] pickle.dump(treatment_counts_mean,open("../plot_data/"+str(data_category)+"_"+str(specific_data)+"_counts_mean_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb")) pickle.dump(treatment_counts_se,open("../plot_data/"+str(data_category)+"_"+str(specific_data)+"_counts_se_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb")) pickle.dump(treatment_counts_sd,open("../plot_data/"+str(data_category)+"_"+str(specific_data)+"_counts_sd_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb")) return "success"
def nanste(array, axis): """ Function that computes standard error accounting for NaN's """ err = stats.nanstd(array, axis=axis) / np.sqrt(nanlen(array, axis)) return err
def nin_get(self, url='http://www.cpc.ncep.noaa.gov/data/indices/sstoi.indices', save=None, csvout='nin.csv'): """ read NIN data from url and return a pandas dataframe @param url: url to data online default is set to : http://www.cpc.ncep.noaa.gov/data/indices/sstoi.indices @param save: directory where to save raw data as csv @return: nindata as pandas dataframe """ try: ts_raw = pd.read_table(url, sep=' ', header=0, skiprows=0, parse_dates=[['YR', 'MON']], skipinitialspace=True, index_col=0, date_parser=parse) print 'dataset used: %s', url ts_year_group = ts_raw.groupby(lambda x: x.year).apply(lambda sdf: sdf if len(sdf) > 11 else None) ts_range = pd.date_range(ts_year_group.index[0][1], ts_year_group.index[-1][1] + pd.DateOffset(months=1), freq="M") ts = pd.DataFrame(ts_year_group.values, index=ts_range, columns=ts_year_group.keys()) ts_fullyears_group = ts.groupby(lambda x: x.year) nin_anomalies = (ts_fullyears_group.mean()['ANOM.3'] - sts.nanmean( ts_fullyears_group.mean()['ANOM.3'])) / sts.nanstd(ts_fullyears_group.mean()['ANOM.3']) nin_anomalies = pd.DataFrame(nin_anomalies.values, index=pd.to_datetime([str(x) for x in nin_anomalies.index])) nin_anomalies = nin_anomalies.rename(columns={'0': 'nin'}) nin_anomalies.columns = ['nin'] if save: eu.ensure_dir(save) output = os.path.join(save, csvout) nin_anomalies.to_csv(output, sep=',', header=True, index=True, index_label='Date') print 'data saved as', output return nin_anomalies except IOError: print 'unable to fetch the data, check if %s is a valid address and data is conform to AMO spec, for info about data spec. see [1]', url
def nanste(array,axis): """ Function that computes standard error accounting for NaN's """ err = stats.nanstd(array,axis=axis)/np.sqrt(nanlen(array,axis)) return err
def calc_stats_old(a, maskzero=False): """Calculate the statistics of an array""" statsDict = {} a = np.array(a) if maskzero: a = np.where( np.equal(a, 0.0), np.nan, a) # Check that array is not all NaNs statsDict['npix'] = int(np.sum(np.where(np.isnan(a),0.0,1.0))) if statsDict['npix']>=2: statsDict['stdev'] = float(stats.nanstd(a.flatten())) statsDict['mean'] = float(stats.nanmean(a.flatten())) statsDict['median'] = float(stats.nanmedian(a.flatten())) statsDict['max'] = float(np.nanmax(a)) statsDict['min'] = float(np.nanmin(a)) statsDict['centmax'] = list(np.unravel_index(np.nanargmax(a), a.shape)) statsDict['madfm'] = float(MAD(a.flatten())) statsDict['npix'] = int(np.sum(np.where(np.isnan(a),0.0,1.0))) statsDict['success'] = True else: statsDict['npix'] == 0 statsDict['stdev'] = 0.0 statsDict['mean'] = 0.0 statsDict['median'] = 0.0 statsDict['max'] = 0.0 statsDict['min'] = 0.0 statsDict['centmax'] = (0.0, 0.0) statsDict['madfm'] = 0.0 statsDict['success'] = False return statsDict
def calc_stats(a, maskzero=False): statsDict = {} a = np.array(a) if maskzero: a = np.where(np.equal(a, 0.0), np.nan, a) # Check that array is not all NaNs statsDict['npix'] = int(np.sum(np.where(np.isnan(a), 0.0, 1.0))) if statsDict['npix'] >= 2: statsDict['stdev'] = float(stats.nanstd(a.flatten())) statsDict['mean'] = float(stats.nanmean(a.flatten())) statsDict['median'] = float(stats.nanmedian(a.flatten())) statsDict['max'] = float(np.nanmax(a)) statsDict['min'] = float(np.nanmin(a)) statsDict['centmax'] = list(np.unravel_index(np.nanargmax(a), a.shape)) statsDict['madfm'] = float(MAD(a.flatten())) statsDict['npix'] = int(np.sum(np.where(np.isnan(a), 0.0, 1.0))) statsDict['success'] = True else: statsDict['npix'] == 0 statsDict['stdev'] = 0.0 statsDict['mean'] = 0.0 statsDict['median'] = 0.0 statsDict['max'] = 0.0 statsDict['min'] = 0.0 statsDict['centmax'] = (0.0, 0.0) statsDict['madfm'] = 0.0 statsDict['success'] = False return statsDict
def plot_wav(wav_file, fignum): f, axarr = plt.subplots(2, 1, False, False, False, num=fignum) (signal, rate) = load_wav_as_mono(wav_file) energy = gen_log_energy_array(signal, rate) filename = os.path.basename(wav_file) (energy_silent_points, energy_inv_silent_points) = find_silent_moments(energy) log_signal = signal * 10 log_signal = np.log10(signal) chunk_size = 50 chunks = chunkyfy(energy, chunk_size) mean_filtered = stats.nanmean(chunks, axis=1) min_filtered = np.nanmin(chunks, axis=1) max_filtered = np.nanmax(chunks, axis=1) std_filtered = stats.nanstd(chunks, axis=1) x = np.linspace(chunk_size / 2, energy.size - chunk_size / 2, min_filtered.size) ax = axarr[0, 0] ax.plot(energy, linewidth=0.4, color='gray') ax.plot(x, mean_filtered, color='b', linewidth=0.4) ax.set_title(filename) plot_split_points(ax, energy_silent_points, energy_inv_silent_points) ax = axarr[1, 0] plot_split_points(ax, energy_silent_points, energy_inv_silent_points) ax.plot(x, std_filtered, color='g', linewidth=0.4) return f
def rms_f(self, x): """Compute standard deviation over time varying axis of a front relative quantity, x. """ # TODO: the axis used in nanmean is different for U and Uf # calcs - change Uf dims to make consistent? return stats.nanstd(x, axis=1)
def anlstd(data): import numpy as np from scipy import stats year = [] for month in np.arange(0,12): year.append(stats.nanstd(data[month::12])) return np.asarray(year)
def calc_sample_loglik(gam_unit, family="poisson"): """ Calculate log likelihood loss function for Poisson or Gaussian distributed data. Log likelihood is defined as: .. math:: L(Y, \theta(X)) = -2 \cdot \log \mathtext{Pr}_{\theta(X)}(Y) where :math:`\theta(X)` is the prediction and :math:`Y` is the actual data. From Friedman, Tibshirani, and Hastie, 2nd ed, 5th print, eq. 7.8. This is the probability of seeing the data, given the prediction. For Poisson, :math:`\mathtext{Pr}_{\theta(X)}(Y)` is given by :math:`pmf(Y,f(X))` and for gaussian, by :math:`pdf(Y, f(X))` """ assert family in ["poisson", "gaussian"] if family == "poisson": # only needs mean shape parameter Pr = -2 * stats.poisson.logpmf(gam_unit.actual, gam_unit.pred) else: # normal needs means and variance # shape handling here calculates average across repeats, # but keeps that dimension for broadcasting purposes pred_std = stats.nanstd(gam_unit.pred, axis=2)[:, :, None] Pr = -2 * stats.norm.logpdf(gam_unit.actual, gam_unit.pred, pred_std) # Pr now has shape (nmod, ntask, nrep, nunit, nbin) # get one number per model... return get_mean_sem_of_samples(Pr)
def compFanoFactor(self,time_range=[],pop_id= 'all',nmax=100): ''' Compute the fano factor for the spike trains given in sp''' self.load_spikes() spikes = self.events['spikes'] if len(spikes)==0: print 'Comp mean rate: spike array is empty !' return np.nan pop_id,spikes,neuron_nr = self.get_pop_spikes(spikes,nmax,pop_id) if len(spikes) == 0: return np.nan if time_range!=[]: idx = (spikes[:,1]>time_range[0]) & (spikes[:,1]<=time_range[1]) spikes = spikes[idx] if time_range==[]: total_time = self.pars['T_total'] else: total_time = time_range[1] - time_range[0] ids = np.unique(spikes[:,0])[:nmax] counts = np.zeros((len(ids),)) for i in np.arange(len(ids)): counts[i] = len(spikes[spikes[:,0]==i,:]) FF = (st.nanstd(counts))**2/st.nanmean(counts) return FF
def bar_by_indep_2d(dep_key, indep_key, data, visible = True, ax = None, color = 'b', show_all = False, use_bar = False, **kwargs): ''' the 2D case (i.e. independent is RR, dependent is RRTF) ''' if type(indep_key) is str: x = data[0][indep_key] else: x = indep_key y = data[dep_key] nbins = x.size y_means = st.nanmean(y, 0) y_sems = st.nanstd(y, 0) / np.sqrt(y.shape[0]) if visible: if ax is None: fig = plt.figure(); ax = fig.add_subplot(111); if show_all: ax.plot(x, y.T, 'gray') if use_bar: line, = ax.bar(x, y_means, yerr = y_sems, color = color, **kwargs) else: line, _, _ = ax.errorbar(x, y_means, yerr = y_sems, lw = 2, color = color, **kwargs) plt.show() return line, ax
def get_final_ecotype_num_data(resource_levels,type_data): for resource_level in resource_levels: ecotype_mean=0 ecotype_se=0 ecotype_sd=0 ecotype_counts=[] for replicate in range(1,31): if type_data=="phenotype": file_str="../data_"+str(resource_level)+"/phenotypes/phenotype_"+str(replicate)+".dat" elif type_data=="genotype": file_str="../data_"+str(resource_level)+"/genotypes_time/genotype_"+str(replicate)+".dat" else: raise StandardError("Incorrect Type Data Entry: "+str(type_data)) ecos_for_update=copy.deepcopy(get_ecotypes_for_update(file_str,type_data)) present_ecotypes=[] if type_data=="genotype": ecotype_counts+=[len(ecos_for_update)] elif type_data=="phenotype": for i in range(len(ecos_for_update)): if str(ecos_for_update[i][0]) not in present_ecotypes: present_ecotypes+=[str(ecos_for_update[i][0])] ecotype_counts+=[len(present_ecotypes)] ecotype_mean=stats.nanmean(ecotype_counts) ecotype_se=stats.sem(ecotype_counts) ecotype_sd=stats.nanstd(ecotype_counts) pickle.dump(ecotype_mean,open("../replication_plot_data/"+str(type_data)+"_nums_mean_"+str(resource_level)+".data","wb")) pickle.dump(ecotype_se,open("../replication_plot_data/"+str(type_data)+"_nums_se_"+str(resource_level)+".data","wb")) pickle.dump(ecotype_sd,open("../replication_plot_data/"+str(type_data)+"_nums_sd_"+str(resource_level)+".data","wb")) return "success"
def get_resource_counts(fluc_levels,fluc_type): for fluc_level in fluc_levels: resource_counts_mean=[[] for i in range(9)] resource_counts_se=[[] for i in range(9)] resource_counts_sd=[[] for i in range(9)] resource_counts=[] for replicate in range(1,31): replicate_counts=[[] for i in range(9)] resources_for_replicate=get_file_lines("../data_"+str(fluc_type)+"_"+str(fluc_level)+"/replicate_"+str(replicate)+"/resource.dat") for i in range(len(resources_for_replicate)): if len(resources_for_replicate[i])!=0 and resources_for_replicate[i][0]!="#": temp=str(resources_for_replicate[i]).split(" ") for j in range(1,10): replicate_counts[j-1]+=[temp[j]] assert len(replicate_counts[0])==500000/50+1,""+str(len(replicate_counts[0])) resource_counts+=[copy.deepcopy(replicate_counts)] assert len(resource_counts)==30,""+str(len(resource_counts)) for resource in range(9): for update in range(0,500001,50): update_data=[float(resource_counts[i][resource][update/50]) for i in range(30)] resource_counts_mean[resource]+=[stats.nanmean(update_data)] resource_counts_se[resource]+=[stats.sem(update_data)] resource_counts_sd[resource]+=[stats.nanstd(update_data)] pickle.dump(resource_counts_mean,open("../plot_data/resource_counts_mean_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb")) pickle.dump(resource_counts_se,open("../plot_data/resource_counts_se_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb")) pickle.dump(resource_counts_sd,open("../plot_data/resource_counts_sd_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb")) return "success"
def getTraceAvg(dm, avgFunc=nanmean, **traceParams): """ Gets a single average trace Arguments: dm -- a DataMatrix Keyword arguments: avgFunc -- the function to use to determine the average trace. This function must be robust to nan values. (default=nanmean) *traceParams -- see getTrace() Returns: An (xData, yData, errData) tuple, where errData contains the standard error. """ traceLen = traceParams['traceLen'] mTrace = np.empty( (len(dm), traceLen) ) mTrace[:] = np.nan i = 0 for trialDm in dm: aTrace = getTrace(trialDm, **traceParams) mTrace[i, 0:len(aTrace)] = aTrace i += 1 xData = np.linspace(0, traceLen, traceLen) yData = nanmean(mTrace, axis=0) errData = nanstd(mTrace, axis=0) / np.sqrt(mTrace.shape[0]) errData = np.array( [errData, errData] ) return xData, yData, errData
def timeseries(iData, zoneMap): ''' Make zone-wise averaging of input data input: 3D matrix(Layers x Width x Height) and map of zones (W x H) output: 2D matrices(L x WH) with mean and std ''' #reshape input cube into 2D matrix r, h, w = iData.shape iData, notNanDataI = cube2flat(iData) #get unique values of labels uniqZones = np.unique(zoneMap) # leave only not-nan uniqZones = uniqZones[~np.isnan(uniqZones)] zoneNum = np.zeros((r, uniqZones.size)) zoneMean = np.zeros((r, uniqZones.size)) zoneStd = np.zeros((r, uniqZones.size)) #in each zone: get all values from input data get not nan data average for i in range(uniqZones.size): zi = uniqZones[i] if not np.isnan(zi): zoneData = iData[:, zoneMap.flat == zi] zoneNum[:, i] = zi zoneMean[:, i] = st.nanmean(zoneData, axis=1) zoneStd[:, i] = st.nanstd(zoneData, axis=1) return zoneMean, zoneStd, zoneNum
def compute_moments(img): ''' Compute the moments of the given image. Parameters ---------- img : numpy.ndarray 2D image. Returns ------- mean : float The 1st moment. variance : float The 2nd moment. skewness : float The 3rd moment. kurtosis : float The 4th moment. ''' mean = nanmean(img, axis=None) variance = nanstd(img, axis=None) ** 2. skewness = np.nansum( ((img - mean) / np.sqrt(variance)) ** 3.) / np.sum(~np.isnan(img)) kurtosis = np.nansum( ((img - mean) / np.sqrt(variance)) ** 4.) / np.sum(~np.isnan(img)) - 3 return mean, variance, skewness, kurtosis
def phot(self): """ builds the table of stars """ import numpy as np from scipy import stats epochs = len(self.objids) stars = len(self.stars) from datasource import DataSource m = np.zeros([epochs, stars]) # objid is specific to a filter so we only need to query the objid wifsip = DataSource(host='pina', database='wifsip', user='******') for star in self.stars: print star, query = """SELECT mag_auto, magerr_auto FROM frames, phot, matched WHERE matched.id like '%s' AND frames.filter like '%s' AND frames.objid = phot.objid AND (matched.objid,matched.star) = (phot.objid,phot.star) AND phot.flags = 0 AND magerr_auto > 0.0;""" % (star, self.filter) result = wifsip.query(query) mags = np.array([s[0] for s in result]) err = np.array([s[1] for s in result]) m = stats.nanmean(mags) s = stats.nanstd(mags) merr = stats.nanmean(err) stderr = stats.nanstd(err) #print mags #print err if len(mags) > 1: print '%4d %.3f %.3f %.3f %.3f' % (len(mags), m, s, merr, stderr), mags = mags[err <= merr + stderr] err = err[err <= merr + stderr] avg = np.average(mags, weights=1. / err) std = np.sqrt(np.average(abs(mags - avg)**2, weights=1. / err)) #std = np.std(mags) print '%4d %.3f %.3f' % (len(mags), avg, std) self.update_star(wifsip, star, avg, std, len(mags)) else: print 'none (%.3f, %.3f)' % (m, s) wifsip.close()
def toleranceLimitProcessing(self,data): # Tolerance limit processing random.seed(1) resampledData = [random.choice(data) for x in xrange(2000)] #address default values: # middle = nanmean([(d[1]+d[0])/2 for d in data])/(self.r[1]-self.r[0]) # print "middle", middle # if(middle < .35): # print "filtering higher range" # f = lambda x: x[1] != 100 or random.random() > .3 # resampledData = filter(f, resampledData) # if(middle > .65*(self.r[1]-self.r[0])): # print "filtering higher range" # f = lambda x: x[0] != 0 or random.random() > .3 # resampledData = filter(f, resampledData) # f = lambda x: (x[0] != 0 and x[1]!=100) or random.random() > .1 # resampledData = filter(f, resampledData) # print "resampled data length", len(resampledData) (resampLower,resampUpper) = zip(*resampledData) resampInterval = map(lambda x: x[1]-x[0], resampledData) meanLower = nanmean(resampLower) stdLower = nanstd(resampLower) * sqrt(len(data)) # it appears *sqrt is done to estimage population std from sample meanUpper = nanmean(resampUpper) stdUpper = nanstd(resampUpper) * sqrt(len(data)) # ditto meanInterval = nanmean(resampInterval) stdInterval = nanstd(resampInterval) * sqrt(len(data)) # ditto K=[32.019, 32.019, 8.380, 5.369, 4.275, 3.712, 3.369, 3.136, 2.967, 2.839, 2.737, 2.655, 2.587, 2.529, 2.48, 2.437, 2.4, 2.366, 2.337, 2.31, 2.31, 2.31, 2.31, 2.31, 2.208] # taken from Liu/Mendel matlab code, in turn from Walpole,Myers,Myers,Ye2008 k = K[min(len(data),24)] acceptableLower = (meanLower-k*stdLower, meanLower+k*stdLower) acceptableUpper = (meanUpper-k*stdUpper, meanUpper+k*stdUpper) acceptableInterval = (meanInterval-k*stdInterval, meanInterval+k*stdInterval) for (l,u) in data[:]: try: if not acceptableLower[0] <= l <= acceptableLower[1]: raise ValueError("Intolerable: lower bound %s not in %s" % (str(l), str(acceptableLower)),(l,u)) if not acceptableUpper[0] <= u <= acceptableUpper[1]: raise ValueError("Intolerable: upper bound %s not in %s" % (str(u), str(acceptableUpper)),(l,u)) if not acceptableInterval[0] <= u-l <= acceptableInterval[1]: raise ValueError("Intolerable: interval %s greater than %s" % (str(u-l), str(acceptableInterval)),(l,u)) except ValueError as (e,d): #print e #print "Intolerable: removing data point %s" % str(d) data.remove(d)
def _normalize(self, arr): ''' perform normalization routine on attributes ''' with warn.catch_warnings(): warn.simplefilter("ignore") for i in xrange(arr.shape[1]): arr[:,i] = (arr[:,i] - nanmean(arr[:,i])) / nanstd(arr[:,i]) arr = np.nan_to_num(arr) return arr
def buildtable(self): """ builds the table of stars """ import numpy as np epochs = len(self.objids) stars = len(self.stars) if fileexists('/work2/jwe/NGC2281/' + self.filter + 'array.npy'): m = np.load('/work2/jwe/NGC2281/' + self.filter + 'array.npy') else: from datasource import DataSource from framecal import FrameCal fc = FrameCal(self.filter) m = np.zeros([epochs, stars]) # objid is specific to a filter so we only need to query the objid wifsip = DataSource(host='pina', database='wifsip', user='******') for objid in self.objids: k = self.objids.index(objid) print k, epochs, objid, query = """SELECT matched.id, phot.mag_auto, phot.mag_errauto FROM phot, matched WHERE phot.objid like '%s' AND (matched.objid,matched.star) = (phot.objid,phot.star) AND phot.flags = 0;""" % objid result = wifsip.query(query) starids = [s[0] for s in result] mags = [s[1] for s in result] err = [s[2] for s in result] slope, intercept, _, _, _ = fc.calframe(objid) print len(mags) for starid in starids: i = self.stars.index(starid) m[k, i] = mags[starids.index(starid)] * slope + intercept np.save('/work2/jwe/NGC2281/' + self.filter + 'array.npy', m) wifsip.close() i = np.where(m == 0.0) m[i] = np.nan from scipy import stats # calculate the observed average for the stars avg = stats.nanmean(m, axis=0) for k in range(epochs): print k, epochs, self.objids[k] # calculate the mean of offsets off = stats.nanmedian(m[k, :] - avg) # correct epoch for mean of offsets m[k, :] += off # calculate new corrected means avg = stats.nanmean(m, axis=0) std = stats.nanstd(m, axis=0) for i in range(len(self.stars)): print self.stars[i], avg[i], std[i]
def lineprops(filename,z,abf,ebf,D=32,H0=[70,2],wm=[.27,.01],wv=[.73,.01]): ''' Determine the best fit physical and intrinsic conditions inside the observed galaxy. These are determined through scientific principles and using other radiative transfer codes (not developed by me) ''' c = 3.e5 k = 1.38e16 c1 = c*10**5 freqwe = np.genfromtxt('lines.catalog') lfreq1 = freqwe[:,0] wt1 = freqwe[:,3] linename = np.loadtxt('lines.catalog',dtype=str) fs = np.loadtxt(filename) f = fs[:,0] sp = fs[:,1]*4654*1./D**2*1000 snu = stats.nanstd(sp) nc = np.ceil(2*np.sqrt(2*np.log(1000))*abf[2]/.031) fm = np.argsort((f-abf[0])**2)[:nc] fm = fm[np.argsort(fm)] name11 = linename[:,1] name21 = linename[:,2] wt = [] name1 = [] name2 = [] lfreq = [] for j in range(np.size(wt1)): if wt1[j] == 1: wt = np.append(wt,wt1[j]) name1 = np.append(name1,name11[j]) name2 = np.append(name2,name21[j]) lfreq = np.append(lfreq,lfreq1[j]) ebf[0] = (ebf[0]**2+(.031/np.sqrt(8*np.log(2)))**2)**.5 ebf[2] = (ebf[2]**2+(.031/np.sqrt(8*np.log(2)))**2)**.5 rfreq = lfreq/(1+z) nlines = np.size(abf)/3. bfline = lfreq[np.argsort((abf[0]-rfreq)**2)[0]] zbf = [(bfline/abf[0]-1),(bfline/abf[0]**2*ebf[0])] #print bfline #v = (c*(1+z)**2-1)/(1+(1+z)**2) zs = np.linspace(0,zbf[0],1000) d = c/H0[0]*np.trapz((wm[0]*(1+zs)**3+wv[0])**-.5,zs) D = [d*(1+zbf[0]),0] # eDwm = c/(H0[0])*(1+zbf[0])*np.trapz(((wm[0]+wm[1])*(1+zs)**3+wv[0]-wm[1])**-.5,zs)-D[0] D[1] = D[0]*H0[1]/H0[0] fwhm1 = [2*np.sqrt(2*np.log(2))*abf[2]/abf[0]*c,0] fwhm = [fwhm1[0]-2*np.sqrt(2*np.log(2))*(fwhm1[0]-(fwhm1[0]**2-(.031*c/abf[0])**2)**.5),0] R = c*.031/abf[0] fwhm[0] += -70.*np.sqrt(8.*np.log(2.))*(np.sqrt(1+(R/70.)**2/(8.*np.log(2.)))-1.) fwhm[1] = ((ebf[0]*c/abf[0])**2+(ebf[2]*c/abf[0])**2)**.5 Sco = [integrate.simps(sp[fm],f[fm])*c/abf[0],0] Sco[1] = np.sqrt(3*fwhm[0]*c*.031/abf[0])*snu Lco = [2.350*Sco[0]*(115/abf[0])**2*D[0]**2*(1+zbf[0])**-3,0] #Sco in mJy Lco[1] = Lco[0]*((Sco[1]/Sco[0])**2+(2*D[1]/D[0])**2+(2*ebf[0]/abf[0])**2)**.5 return zbf,fwhm,Sco,D,Lco
def aggregate_ftr_matrix(ftr_matrix): sig = [] for ftr in ftr_matrix: median = stats.nanmedian(ftr) mean = stats.nanmean(ftr) std = stats.nanstd(ftr) # Invalid double scalars warning appears here skew = stats.skew(ftr) if any(ftr) else 0.0 kurtosis = stats.kurtosis(ftr) sig.extend([median, mean, std, skew, kurtosis]) return sig
def standardize(X, axis=None): r"""Subtracts the data mean and divide by its standard deviation at the specified axis. Accepts NaNs.""" # NOTE: There is an alternative in scipy.stats.mstats.zscore mu = nanmean(X, axis=axis) sigma = nanstd(X, axis=axis) Xr = (X - mu) / sigma return Xr, mu, sigma
def fromData(self, y): """ Compute scoring matrix based on estimated covariance matrix of y Estimated covariance matrix is geiven by 1/2 variance of the second order differences of y INPUT: y -- DxN -- N measurements of a time series in D dimensions """ self.y0 = nanmean(y, 1) self.M = diag(nanstd(diff(y, n=2, axis=1), axis=1)) self.S = diag(1 / sqrt(diag(self.M)))
def get3NetworkAvg(data_t, titleName, roiNames, numRuns): #Define the streams #Ventral=[1, 3, 11, 12, 13, 14] #Dorsal=[2, 4, 5, 6, 7, 8, 9, 10] #Lateral=[0, 1, 2, 3, 4] Lateral = [0, 1, 2, 8, 9] Dorsal = [8, 9, 10, 11, 12, 13, 14, 15] Ventral = [1, 2, 3, 4, 5, 6] print 'Ventral rois: ' + str(roiNames[Ventral]) print 'Dorsal rois: ' + str(roiNames[Dorsal]) print 'Early Visual rois: ' + str(roiNames[Lateral]) # Get network averages lateralCoher = getNetworkWithin(data_t, Lateral) dorsalCoher = getNetworkWithin(data_t, Ventral) ventralCoher = getNetworkWithin(data_t, Dorsal) #allMeansWithin=(stats.nanmean(lateralCoher.flat), stats.nanmean(dorsalCoher.flat), stats.nanmean(ventralCoher.flat)) #allSTDWithin=(stats.nanstd(lateralCoher.flat), stats.nanstd(dorsalCoher.flat), stats.nanstd(ventralCoher.flat)) allMeansWithin = (stats.nanmean(dorsalCoher.flat), stats.nanmean(ventralCoher.flat)) allSTDWithin = (stats.nanstd(dorsalCoher.flat), stats.nanstd(ventralCoher.flat)) latBtwCoher = getNetworkBtw(data_t, Lateral, Ventral + Dorsal) dorsBtwCoher = getNetworkBtw(data_t, Dorsal, Ventral) ventBtwCoher = getNetworkBtw(data_t, Ventral, Dorsal) #allMeansBtw=(stats.nanmean(latBtwCoher), stats.nanmean(dorsBtwCoher), stats.nanmean(ventBtwCoher)) #allSTDBtw=(stats.nanstd(latBtwCoher), stats.nanstd(dorsBtwCoher), stats.nanstd(ventBtwCoher)) # Just dorsal versus ventral allMeansBtw = (stats.nanmean(dorsBtwCoher), stats.nanmean(ventBtwCoher)) allSTDBtw = (stats.nanstd(dorsBtwCoher), stats.nanstd(ventBtwCoher)) # Make bar graph title = titleName + 'by Network for ' + sub + ' for ' + str( numRuns) + ' runs' labels = ('Dorsal', 'Ventral') makeBarPlots(allMeansWithin, allSTDWithin, allMeansBtw, allSTDBtw, title, labels)
def plot_dist_to_targ(task_entry, reach_trajectories=None, targ_dist=10., plot_all=False, ax=None, target=None, update_rate=60., decoder_rate=10., **kwargs): task_entry = dbfn.lookup_task_entries(task_entry) if reach_trajectories == None: reach_trajectories = task_entry.get_reach_trajectories() if target == None: target = np.array([targ_dist, 0]) trajectories_dist_to_targ = [ map(np.linalg.norm, traj.T - target) for traj in reach_trajectories ] step = update_rate / decoder_rate trajectories_dist_to_targ = map(lambda x: x[::step], trajectories_dist_to_targ) max_len = np.max([len(traj) for traj in trajectories_dist_to_targ]) n_trials = len(trajectories_dist_to_targ) # TODO use masked arrays data = np.ones([n_trials, max_len]) * np.nan for k, traj in enumerate(trajectories_dist_to_targ): data[k, :len(traj)] = traj from scipy.stats import nanmean, nanstd mean_dist_to_targ = np.array([nanmean(data[:, k]) for k in range(max_len)]) std_dist_to_targ = np.array([nanstd(data[:, k]) for k in range(max_len)]) if ax == None: plt.figure() ax = plt.subplot(111) # time vector, assuming original screen update rate of 60 Hz time = np.arange(max_len) * 0.1 if plot_all: for dist_to_targ in trajectories_dist_to_targ: ax.plot(dist_to_targ, **kwargs) else: ax.plot(time, mean_dist_to_targ, **kwargs) import plotutil #plotutil.set_ylim(ax, [0, targ_dist]) plotutil.ylabel(ax, 'Distance to target') plotutil.xlabel(ax, 'Time (s)') plt.draw()
def plot_means(dataset): min_age = min(dataset.ages) max_age = max(dataset.ages) min_expression = np.nanmin(dataset.expression.flat) max_expression = np.nanmax(dataset.expression.flat) center = np.empty(dataset.ages.shape) std_plus = np.empty(dataset.ages.shape) std_minus = np.empty(dataset.ages.shape) for i, age in enumerate(dataset.ages): a = dataset.expression[i, :, :].flat c = nanmean(a) s = nanstd(a) center[i] = c std_plus[i] = c + s std_minus[i] = c - s fig = plt.figure() ax = fig.add_axes([0.08, 0.15, 0.85, 0.8]) ax.set_ylabel('expression level', fontsize=cfg.fontsize) ax.set_xlabel('age', fontsize=cfg.fontsize) ax.set_title('Mean expression across all genes - {}'.format(dataset.name), fontsize=cfg.fontsize) # set the development stages as x labels stages = [stage.scaled(scaler) for stage in dev_stages] ax.set_xticks([stage.central_age for stage in stages]) ax.set_xticklabels([stage.short_name for stage in stages], fontsize=cfg.xtick_fontsize, fontstretch='condensed', rotation=90) ax.set_xlim([min_age, max_age]) # mark birth time with a vertical line ymin, ymax = ax.get_ylim() birth_age = scaler.scale(0) ax.plot([birth_age, birth_age], [ymin, ymax], '--', color='0.85') ax.plot([min_age, max_age], [min_expression, min_expression], '--g') ax.plot([min_age, max_age], [max_expression, max_expression], '--g') ax.plot(dataset.ages, center, 'bx') ax.plot(dataset.ages, std_plus, 'g-') ax.plot(dataset.ages, std_minus, 'g-') save_figure(fig, 'mean-expression-{}.png'.format(dataset.name), under_results=True)
def MeanWithConfidenceInterval(Y, confidence=0.95): """ Use the fact that (mean(Y) - mu) / (std(Y)/sqrt(n)) is a Student T distribution with n-1 degrees of freedom Returns: 2 tuple (mean, symmetric confidence interval size). """ n = len(Y) Y_bar = st.nanmean(Y) # According to the Student T-test distribution for n-1 degrees of freedom # find the position where the CDF is 0.975 (assuming we want a confidence # of 0.95). The lower part of the tail will account for the other 0.025 # chance. t = st.t.ppf((confidence + 1.0) / 2.0, n - 1) SD = st.nanstd(Y, bias=False) # use the unbiased estimator: sqrt(y^2 / (n-1)) SE = SD / np.sqrt(len(Y)) return Y_bar, t * SE
def updateLabelsAndFit(self, bufferA, bufferB): self.plotAttributes["curve"].setData(bufferA, bufferB) try: if self.ui.checkBoxAutoscale.isChecked(): self.setPlotRanges(bufferA, bufferB) minBufferA = nanmin(bufferA) minBufferB = nanmin(bufferB) maxBufferA = nanmax(bufferA) maxBufferB = nanmax(bufferB) if self.ui.checkBoxShowAve.isChecked(): rtbsaUtils.setPosAndText(self.text["avg"], nanmean(bufferB), minBufferA, minBufferB, 'AVG: ') if self.ui.checkBoxShowStdDev.isChecked(): xPos = (minBufferA + (minBufferA + maxBufferA) / 2) / 2 rtbsaUtils.setPosAndText(self.text["std"], nanstd(bufferB), xPos, minBufferB, 'STD: ') if self.ui.checkBoxCorrCoeff.isChecked(): correlation = corrcoef(bufferA, bufferB) rtbsaUtils.setPosAndText(self.text["corr"], correlation.item(1), minBufferA, maxBufferB, "Corr. Coefficient: ") if self.ui.checkBoxLinFit.isChecked(): self.text["slope"].setPos((minBufferA + maxBufferA) / 2, minBufferB) self.getLinearFit(bufferA, bufferB, True) elif self.ui.checkBoxPolyFit.isChecked(): self.text["slope"].setPos((minBufferA + maxBufferA) / 2, minBufferB) self.getPolynomialFit(bufferA, bufferB, True) except ValueError: print "Error updating plot range"
def moving_average (feedbacks, slot_n, prediction_length, mmc): past_delays_fitted=numpy.asarray(feedbacks) col_mean = stats.nanmean(past_delays_fitted,axis=0) col_std = stats.nanstd(past_delays_fitted,axis=0) inds = numpy.where(numpy.isnan(past_delays_fitted)) past_delays_fitted[inds]=numpy.take(col_mean,inds[1]) wifi_delays=past_delays_fitted[:,0] lte_delays=past_delays_fitted[:,1] if mmc: forward_predicted_delays_mmc=numpy.c_[numpy.random.normal(col_mean[0], col_std[0], prediction_length), numpy.random.normal(col_mean[1], col_std[1], prediction_length )] forward_predicted_delays_mmc[ forward_predicted_delays_mmc <0 ]=0 # truncate negative samples predicted_ma=numpy.r_ [past_delays_fitted, forward_predicted_delays_mmc] return col_mean, col_std, predicted_ma else: predicted_ma=numpy.r_ [past_delays_fitted, numpy.zeros((prediction_length,2)) ] for pl in range (past_delays_fitted.shape[0] , past_delays_fitted.shape[0]+prediction_length): predicted_ma[pl,0]= numpy.divide ( numpy.sum(numpy.divide( wifi_delays , range(wifi_delays.shape[0]+1,1,-1), dtype='float_' )) , numpy.sum(numpy.divide(1,range(wifi_delays.shape[0]+1,1,-1),dtype='float_')), dtype='float_' ) predicted_ma[pl,1]= numpy.divide ( numpy.sum(numpy.divide( lte_delays , range(lte_delays.shape[0]+1,1,-1), dtype='float_' )) , numpy.sum(numpy.divide(1,range(lte_delays.shape[0]+1,1,-1),dtype='float_')), dtype='float_' ) return predicted_ma
def standardize_col(dat, meanonly=False): ''' Mean impute each columns of an array. ''' colmean = st.nanmean(dat) if ~meanonly: colstd = st.nanstd(dat) else: colstd = None ncol = dat.shape[1] nmissing = sp.zeros((ncol)) datimp = sp.empty_like(dat) datimp[:] = dat for c in sp.arange(0, ncol): datimp[sp.isnan(datimp[:, c]), c] = colmean[c] datimp[:, c] = datimp[:, c] - colmean[c] if not meanonly: if colstd[c] > 1e-6: datimp[:, c] = datimp[:, c] / colstd[c] else: print "warning: colstd=" + colstd[c] + " during normalization" nmissing[c] = float(sp.isnan(dat[:, c]).sum()) fracmissing = nmissing / dat.shape[0] return datimp, fracmissing
def meanstd(x,axis=None): return stats.nanmean(x,axis),stats.nanstd(x,axis)
def main(argv): #default settings markerSize=16 markerSize2=16 markerColor='g' markerColor2='red' lineWidth=2 fontSize=16 unit='cm' Save_timeseries='no' dispTsFig='yes' dispVelFig='yes' dispContour='only' contour_step=200 smoothContour='no' radius=0; edgeWidth=1.5 fig_dpi=300 if len(sys.argv)>2: try: opts, args = getopt.getopt(argv,"f:F:v:a:b:s:m:c:w:u:l:h:S:D:C:V:t:T:d:r:x:y:P:p:") except getopt.GetoptError: Usage() ; sys.exit(1) for opt,arg in opts: if opt == '-f': timeSeriesFile = arg elif opt == '-F': timeSeriesFile_2 = arg elif opt == '-v': velocityFile = arg elif opt == '-a': vmin = float(arg) elif opt == '-b': vmax = float(arg) elif opt == '-s': fontSize = int(arg) elif opt == '-m': markerSize=int(arg); markerSize2=int(arg) elif opt == '-S': Save_timeseries=arg elif opt == '-c': markerColor=arg elif opt == '-w': lineWidth=int(arg) elif opt == '-u': unit=arg elif opt == '-l': lbound=float(arg) elif opt == '-h': hbound=float(arg) elif opt == '-D': demFile=arg elif opt == '-C': dispContour=arg elif opt == '-V': contour_step=float(arg) elif opt == '-t': minDate=arg elif opt == '-T': maxDate=arg elif opt == '-d': datesNot2show = arg.split() elif opt == '-r': radius=abs(int(arg)) elif opt == '-x': xsub = [int(i) for i in arg.split(':')]; xsub.sort(); dispVelFig='no' elif opt == '-y': ysub = [int(i) for i in arg.split(':')]; ysub.sort(); dispVelFig='no' elif opt == '-P': dispTsFig=arg elif opt == '-p': dispVelFig=arg elif len(sys.argv)==2: if argv[0]=='-h': Usage(); sys.exit(1) elif os.path.isfile(argv[0]): timeSeriesFile = argv[0] h5timeseries = h5py.File(timeSeriesFile) if not 'timeseries' in h5timeseries.keys(): print 'ERROR' Usage(); sys.exit(1) else: Usage(); sys.exit(1) elif len(sys.argv)<2: Usage(); sys.exit(1) if unit in ('m','M'): unitFac=1 elif unit in ('cm','Cm','CM'): unitFac=100 elif unit in ('mm','Mm','MM','mM'): unitFac=1000 else: print 'Warning:' print 'wrong unit input!' print 'cm is considered to display the displacement' ############################################################## # Read time series file info if not os.path.isfile(timeSeriesFile): Usage();sys.exit(1) h5timeseries = h5py.File(timeSeriesFile) if not 'timeseries' in h5timeseries.keys(): Usage(); sys.exit(1) dateList1 = h5timeseries['timeseries'].keys() ############################################################## # Dates to show time series plot import matplotlib.dates as mdates years = mdates.YearLocator() # every year months = mdates.MonthLocator() # every month yearsFmt = mdates.DateFormatter('%Y') print '*******************' print 'All dates existed:' print dateList1 print '*******************' try: datesNot2show print 'dates not to show: '+str(datesNot2show) except: datesNot2show=[] try: minDate minDateyy=yyyymmdd2years(minDate) print 'minimum date: '+minDate for date in dateList1: yy=yyyymmdd2years(date) if yy < minDateyy: datesNot2show.append(date) except: pass try: maxDate maxDateyy=yyyymmdd2years(maxDate) print 'maximum date: '+maxDate for date in dateList1: yy=yyyymmdd2years(date) if yy > maxDateyy: datesNot2show.append(date) except: pass try: dateList=[] for date in dateList1: if date not in datesNot2show: dateList.append(date) print '--------------------------------------------' print 'dates used to show time series displacements:' print dateList print '--------------------------------------------' except: dateList=dateList1 print 'using all dates to show time series displacement' ################################################################### # Date info dateIndex={} for ni in range(len(dateList)): dateIndex[dateList[ni]]=ni tbase=[] d1 = datetime.datetime(*time.strptime(dateList[0],"%Y%m%d")[0:5]) for ni in range(len(dateList)): d2 = datetime.datetime(*time.strptime(dateList[ni],"%Y%m%d")[0:5]) diff = d2-d1 tbase.append(diff.days) dates=[] for ni in range(len(dateList)): d = datetime.datetime(*time.strptime(dateList[ni],"%Y%m%d")[0:5]) dates.append(d) datevector=[] for i in range(len(dates)): datevector.append(np.float(dates[i].year) + np.float(dates[i].month-1)/12 + np.float(dates[i].day-1)/365) datevector2=[round(i,2) for i in datevector] ########################################### # Plot Fig 1 - Velocity / last epoch of time series / DEM import matplotlib.pyplot as plt if dispVelFig in ('yes','Yes','y','Y','YES'): fig = plt.figure() ax=fig.add_subplot(111) try: velocityFile h5file=h5py.File(velocityFile,'r') k=h5file.keys() dset= h5file[k[0]].get(k[0]) print 'display: ' + k[0] except: dset = h5timeseries['timeseries'].get(h5timeseries['timeseries'].keys()[-1]) print 'display: last epoch of timeseries' #DEM/contour option try: demFile import _readfile as readfile if os.path.basename(demFile).split('.')[1]=='hgt': amp,dem,demRsc = readfile.read_float32(demFile) elif os.path.basename(demFile).split('.')[1]=='dem': dem,demRsc = readfile.read_dem(demFile) if dispContour in ('no','No','n','N','NO','yes','Yes','y','Y','YES'): print 'show DEM as basemap' cmap_dem=plt.get_cmap('gray') import _pysar_utilities as ut plt.imshow(ut.hillshade(dem,50.0),cmap=cmap_dem) if dispContour in ('only','Only','o','O','ONLY','yes','Yes','y','Y','YES'): print 'show contour' if smoothContour in ('yes','Yes','y','Y','YES'): import scipy.ndimage as ndimage dem=ndimage.gaussian_filter(dem,sigma=10.0,order=0) contour_sequence=np.arange(-6000,9000,contour_step) plt.contour(dem,contour_sequence,origin='lower',colors='black',alpha=0.5) except: print 'No DEM file' try: img=ax.imshow(dset,vmin=vmin,vmax=vmax) except: img=ax.imshow(dset) import matplotlib.patches as patches # need for draw rectangle of points selected on VelFig ########################################## # Plot Fig 2 - Time series plot import scipy.stats as stats fig2 = plt.figure(2) ax2=fig2.add_subplot(111) try: timeSeriesFile_2 h5timeseries_2=h5py.File(timeSeriesFile_2) print 'plot 2nd time series' except: pass ########### Plot Time Series with x/y ########## try: xsub ysub try: xmin=xsub[0]; xmax=xsub[1]+1; print 'x='+str(xsub[0])+':'+str(xsub[1]) except: xmin=xsub[0]-radius; xmax=xsub[0]+radius+1; print 'x='+str(xsub[0])+'+/-'+str(radius) try: ymin=ysub[0]; ymax=ysub[1]+1; print 'y='+str(ysub[0])+':'+str(ysub[1]) except: ymin=ysub[0]-radius; ymax=ysub[0]+radius+1; print 'y='+str(ysub[0])+'+/-'+str(radius) try: fig rectSelect=patches.Rectangle((xmin,ymin),radius*2+1,radius*2+1,fill=False,lw=edgeWidth) ax.add_patch(rectSelect) except: pass Dis=[] for date in dateList: Dis.append(h5timeseries['timeseries'].get(date)[ymin:ymax,xmin:xmax]) Dis0=array(Dis) dis=Dis0*unitFac dis=reshape(dis,(len(dateList),-1)) dis_mean=stats.nanmean(dis,1) if (xmax-xmin)*(ymax-ymin)==1: dis_std=[0]*len(dateList) else: dis_std=stats.nanstd(dis,1) (_, caps, _)=ax2.errorbar(dates,dis_mean,yerr=dis_std,fmt='-ko',\ ms=markerSize, lw=lineWidth, alpha=1, mfc=markerColor,\ elinewidth=edgeWidth,ecolor='black',capsize=markerSize*0.5) for cap in caps: cap.set_markeredgewidth(edgeWidth) print dis_mean # x axis format ax2.fmt_xdata = DateFormatter('%Y-%m-%d %H:%M:%S') if unitFac==100: ax2.set_ylabel('Displacement [cm]',fontsize=fontSize) elif unitFac==1000: ax2.set_ylabel('Displacement [mm]',fontsize=fontSize) else: ax2.set_ylabel('Displacement [m]' ,fontsize=fontSize) ax2.set_xlabel('Time [years]',fontsize=fontSize) ax2.set_title('x='+str(xmin)+':'+str(xmax-1)+', y='+str(ymin)+':'+str(ymax-1)) ax2.xaxis.set_major_locator(years) ax2.xaxis.set_major_formatter(yearsFmt) ax2.xaxis.set_minor_locator(months) datemin = datetime.date(int(datevector[0]),1,1) datemax = datetime.date(int(datevector[-1])+1,1,1) ax2.set_xlim(datemin, datemax) # y axis format try: lbound hbound ax2.set_ylim(lbound,hbound) except: ax2.set_ylim(nanmin(dis_mean-dis_std)-0.4*abs(nanmin(dis_mean)),\ nanmax(dis_mean+dis_std)+0.4*abs(nanmax(dis_mean))) for tick in ax2.xaxis.get_major_ticks(): tick.label.set_fontsize(fontSize) for tick in ax2.yaxis.get_major_ticks(): tick.label.set_fontsize(fontSize) #fig2.autofmt_xdate() #adjust x overlap by rorating, may enble again if Save_timeseries in ('yes','Yes','Y','y','YES'): import scipy.io as sio Delay={} Delay['displacement']=Dis0 Delay['unit']='m' Delay['time']=datevector tsNameBase='ts_x'+str(xmin)+'_'+str(xmax-1)+'y'+str(ymin)+'_'+str(ymax-1) sio.savemat(tsNameBase+'.mat', {'displacement': Delay}) print 'saved data to '+tsNameBase+'.mat' plt.savefig(tsNameBase+'.pdf',dpi=fig_dpi) print 'saved plot to '+tsNameBase+'.pdf'