예제 #1
0
파일: batch.py 프로젝트: kenjsc/CAMpping
def all_means(cp):
	"""Function accepts a cp object and returns a mean and standard deviation of all
	parameters for all fingerprints in this form:
	list of (feature, mu, std) for each feature
	"""
	data = []
	for parameter_pair in zip(*[finger.items() for finger in cp.fingerprints()]):
		parameter, values = zip(*parameter_pair)
		print parameter, values
		print st.nanmean(values), st.nanstd(values)
		data.append([parameter[0], st.nanmean(values), st.nanstd(values)])
	return data
예제 #2
0
파일: cinfo.py 프로젝트: StephGagne/SAMAC
def cinfo(CL,param):
    """ This property returns information on the parameter in the cloud (all given in the units of the parameter). Note that the parameter is averaged over the entire cloud time at the altitude required (bottom, top or in-cloud) - not the case using vpinfo(CL,param).
        CloudObj.cinfo["bottom"]: param at the cloud base
        CloudObj.cinfo["top"]: param at the cloud top
        CloudObj.cinfo["mean"]: mean param through the cloud (in cloud)
        CloudObj.cinfo["median"]: median param through the cloud (in cloud)
        CloudObj.cinfo["stdev"]: standard deviation of the param through the cloud (in cloud)
        CloudObj.cinfo["delta"]: difference of param between the bottom and the top
        CloudObj.cinfo["slope"]: delta divided by the mean thickness
        The property can be accessed as e.g. CloudObj.cinfo["bottom"] or CloudObj.cinfo (dictionary) """
    H=dict()
    H["bottom"]=list(); H["top"]=list(); H["mean"]=list(); H["median"]=list(); H["stdev"]=list(); H["delta"]=list(); H["slope"]=list(); H["units"]=list(); 
    alt=[i for i,x in enumerate(CL.dttl) if x == 'altitude'][0]
    T=[i for i,x in enumerate(CL.dttl) if x == param][0]
    try:
        for i in range(len(CL.props["height"])):
            ix=nonzero((CL.data[alt]>=CL.props["height"][i][1])*(CL.data[alt]<=CL.props["height"][i][2]))
            H["bottom"].append(float(st.nanmedian(CL.data[T][nonzero((CL.data[alt]>=CL.props["height"][i][0])*(CL.data[alt]<=CL.props["height"][i][1]))])))
            H["top"].append(float(st.nanmedian(CL.data[T][nonzero((CL.data[alt]>=CL.props["height"][i][2])*(CL.data[alt]<=CL.props["height"][i][3]))])))
            H["mean"].append(float(st.nanmean(CL.data[T][ix])))
            H["median"].append(float(st.nanmedian(CL.data[T][ix])))
            H["stdev"].append(float(st.nanstd(CL.data[T][ix])))
            H["delta"].append(H["bottom"][i]-H["top"][i])
            H["slope"].append(H["delta"][i]/(np.mean([CL.props["height"][i][2], CL.props["height"][i][3]])-np.mean([CL.props["height"][i][0], CL.props["height"][i][1]])))     # units/meter
            H["units"].append(CL.dunit[T])
            del ix
    except: print("[cinfo] Height properties must be defined first using the defheight method.")
    return H
예제 #3
0
	def collapse(self, keys, vName):

		"""
		desc:
			Collapse the data by a (list of) keys and get statistics on a
			dependent variable.

		arguments:
			keys:
				desc:	A key or list of keys to collapse the data on.
				type:	[list, str, unicode]
			vName:
				desc:	The dependent variable to collapse. Alternative, you can
						specifiy a function, in which case the error will be 0.
				type:	[str, unicode, function]

		returns:
			desc:	A DataMatrix with the collapsed data, with the descriptives
					statistics on `vName`.
			type:	DataMatrix
		"""

		if isinstance(keys, basestring):
			keys = [keys]

		m = [keys + ['mean', 'median', 'std', 'se', '95ci', 'count']]
		for g in self.group(keys):
			l = []
			for key in keys:
				l.append(g[key][0])
			if type(vName) == types.FunctionType:
				l.append(vName(g))
				l.append(np.nan)
				l.append(np.nan)
				l.append(np.nan)
				l.append(np.nan)
				l.append(len(g))
			else:
				a = g[vName]
				l.append(nanmean(a))
				l.append(nanmedian(a))
				l.append(nanstd(a))
				l.append(nanstd(a)/np.sqrt(a.size))
				l.append(1.96*nanstd(a)/np.sqrt(a.size))
				l.append(a.size)
			m.append(l)
		return DataMatrix(m)
예제 #4
0
파일: graph.py 프로젝트: msullivan/he-man
def produce_stats(data):
    m = {}
    for (server, size, conns, it, time, rate) in data:
        map_add(m, (server, size, conns), rate)
    data = []
    for k, v in m.items():
        mean = stats.nanmean(v)
        stddev = stats.nanstd(v)
        data += [k + (mean, stddev)]
    return data
예제 #5
0
파일: pdf.py 프로젝트: toejamhoney/nabu
 def aggregate_ftr_matrix(self, ftr_matrix):
     sig = []
     for ftr in ftr_matrix:
         median = stats.nanmedian(ftr)
         mean = stats.nanmean(ftr)
         std = stats.nanstd(ftr)
         # Invalid double scalars warning appears here
         skew = stats.skew(ftr) if any(ftr) else 0.0
         kurtosis = stats.kurtosis(ftr)
         sig.extend([median, mean, std, skew, kurtosis])
     return sig
예제 #6
0
파일: pdf.py 프로젝트: toejamhoney/nabu
 def aggregate_ftr_matrix(self, ftr_matrix):
     sig = []
     for ftr in ftr_matrix:
         median = stats.nanmedian(ftr)
         mean = stats.nanmean(ftr)
         std = stats.nanstd(ftr)
         # Invalid double scalars warning appears here
         skew = stats.skew(ftr) if any(ftr) else 0.0
         kurtosis = stats.kurtosis(ftr)
         sig.extend([median, mean, std, skew, kurtosis])
     return sig
예제 #7
0
파일: batch.py 프로젝트: kenjsc/CAMpping
def dmso_means(cp):
	"""Function accepts a cp object and returns a mean and standard deviation of all 
	parameters for the dmso fingerprints in this form:
	
	list of (feature, mu, std) for each feature
	"""
	dmsos = [dmso.items() for dmso in get_dmsos(cp)]
	data = []
	for parameter_pair in zip(*dmsos):
		parameter, values = zip(*parameter_pair)
		data.append((parameter[0], st.nanmean(values), st.nanstd(values)))
	return data
예제 #8
0
def run_stats(x,n):
    """runstats(x,n). Calculates and returns the running mean, median, standard deviation, and median absolute deviation (MAD). This function handles NaNs and masked values (masked arrays) by ignoring them.
    x (input) is the array on which the running statistics are calculated (only one dimension, 1D array).
    n is the number of points taken in the running statistics window."""
    x=copy.deepcopy(x)
    try: x.mask
    except: 
        x=np.ma.array(x,mask=False)

    if len(np.shape(x))>2: raise ValueError("The array provided has more than 2 dimensions, at most 1 or 2 dimensions can be handled.")
    try: [ro,co]=np.shape(x)
    except: ro=np.shape(x)[0]; co=1
    if ro==1 or co==1: 
        ro=max(ro,co)
        x=x.reshape(ro,)
    else: raise ValueError("The array must be a vector (one column or row)")
    # initializing matrix
    M=ones([ro,n])*NaN;
    M=ma.asanyarray(M)
    
    # building matrix
    if n%2==1:       # if n is odd
        for j in range(int(n/2),0,-1):
            posi=int(n/2)-j       # current position
            M[0:ro-j,posi]=x[j:]
        for j in range(1,2+int(n/2),1):
            posi=int(n/2)+j-1;
            M[j-1:,posi]=x[0:(ro+1)-j]
    elif n%2==0:        # if n is even
        for j in range(n/2,0,-1):
            posi=n/2-j
            M[0:ro-j,posi]=x[j:]
        for j in range(1,n/2+1):
            posi=n/2+j-1;
            M[j-1:,posi]=x[0:(ro+1)-j]
    else: print("Well, that's pretty weird. Are you sure n is an integer?")  
    
    M.data[M.mask]=nan
    ave=st.nanmean(M, axis=1);
    med=st.nanmedian(M, axis=1);
    stde=st.nanstd(M, axis=1);
    mad=medabsdev(M,axis=1)
    return [ave, med, stde, mad]
예제 #9
0
def compute_average(nh=10, lr_num=10, lr_denum=1000, prefix='rbm', smoothing=True):
    cmd = "grep -rl --include='orig.conf' 'lr_num = %i$' . |" % lr_num +\
          "xargs grep 'lr_denum = %i$' " % lr_denum
    print cmd

    p = os.popen(cmd)
    numseeds = len([pi for pi in enumerate(p)])
    
    p = os.popen(cmd)
    x = numpy.ones((numseeds, 20)) * numpy.nan
    y = numpy.ones((numseeds, 20)) * numpy.nan

    for i, match in enumerate(p):

        jid = match.split('/')[1]
        rfname = '%s/%s_train_callback.hdf5' % (jid, prefix)
        if not os.path.exists(rfname):
            continue

        fp = tables.openFile(rfname)
        _x = fp.root.train_ll.col('n')
        _y = fp.root.train_ll.col('train_ll')
        _vlogz = fp.root.var_logz.col('var_logz')
        fp.close()

        if smoothing:
            idx = numpy.where(_vlogz < 50.)[0]
            x[i, idx] = _x[idx]
            y[i, idx] = _y[idx]
        else:
            x[i, :len(_x)] = _x
            y[i, :len(_y)] = _y
    
    print '**** prefix=%s nh=%i lr_num=%s lr_denum=%s ******' % (prefix, nh, lr_num, lr_denum)
    print nanmean(y, axis=0)

    xmean = nanmean(x, axis=0)
    ymean = nanmean(y, axis=0)
    ystd  = nanstd(y, axis=0)
    ystd[numpy.isnan(ystd)] = 0.
    idx =  ~numpy.isnan(xmean)
    return [xmean[idx], ymean[idx], ystd[idx]]
예제 #10
0
def runstats(x,n):
# Stephanie Gagne, UHel, 2010
# converted to Python, Dal, 2012
# x is an array of 1 dimension.
# n is the number of point taken in the running statistic
    """takes data, number of points for the running mean/standard deviation and returns the running mean and running standard deviation."""
    try: x.mask
    except: 
        x=ma.asanyarray(x); 
        x.mask=ones(np.shape(x))*False
    try: [ro,co]=np.shape(x)
    except: ro=np.shape(x)[0]; co=1
    if ro==1 or co==1: x=x.reshape(max(ro,co),)
    else: print("The array must be a vector (one column or row)")
    # initializing matrix
    ro=max(ro,co)
    M=ones([ro,n])*NaN;
    M=ma.asanyarray(M)
    
    # building matrix
    if n%2==1:       # if n is odd
        for j in range(int(n/2),0,-1):
            posi=int(n/2)-j       # current position
            M[0:ro-j,posi]=x[j:]
        for j in range(1,2+int(n/2),1):
            posi=int(n/2)+j-1;
            M[j-1:,posi]=x[0:(ro+1)-j]
    elif n%2==0:        # if n is even
        for j in range(n/2,0,-1):
            posi=n/2-j
            M[0:ro-j,posi]=x[j:]
        for j in range(1,n/2+1):
            posi=n/2+j-1;
            M[j-1:,posi]=x[0:(ro+1)-j]
    else: print("Well, that's pretty weird. Are you sure n is an integer?")  
    
    M.data[M.mask]=NaN
    ave=st.nanmean(M, axis=1);
    stde=st.nanstd(M, axis=1);
    return [ave, stde]      
예제 #11
0
	def addDescriptives(self):

		"""Adds averages and errors to the PivotMatrix"""

		# Determine the row averages and std
		self.rowMeans = []
		self.rowStds = []
		for rowIndex in range(self.nRows):
			row = self.m[self.rowHeaders+rowIndex][self.colHeaders:-2]
			self.rowMeans.append(nanmean(row, axis=None))
			self.rowStds.append(nanstd(row, axis=None))
			self.m[self.rowHeaders+rowIndex][-2] = nanmean(row, axis=None)
			self.m[self.rowHeaders+rowIndex][-1] = nanstd(row, axis=None)

		# Determine the column averages and std
		_m = self.m.swapaxes(0,1)
		self.colMeans = []
		self.colErrs = []
		for colIndex in range(self.nCols):
			col = _m[self.colHeaders+colIndex][self.rowHeaders:-2]
			_m[self.colHeaders+colIndex][-2] = nanmean(col, axis=None)
			if self.err == '95ci':
				e = nanstd(col, axis=None)/np.sqrt(col.size)*1.96
			elif self.err == 'se':
				e = nanstd(col, axis=None)/np.sqrt(col.size)
			elif self.err == 'std':
				e = nanstd(col, axis=None)
			else:
				raise Exception('Err keyword must be "95ci", "se", or "std"')
			_m[self.colHeaders+colIndex][-1] = e
			self.colMeans.append(nanmean(col, axis=None))
			self.colErrs.append(e)

		# Determine the grand average and std
		self.m[-2,-2] = nanmean(self.m[self.rowHeaders:-2, self.colHeaders:-2], \
			axis=None)
		self.m[-1,-1] = nanstd(self.m[self.rowHeaders:-2, self.colHeaders:-2], \
			axis=None)
예제 #12
0
            csv_writer = csv.writer(csvfile, delimiter=',', quotechar='|')
            csv_writer.writerow(hot[beginning_time:end_time])

        with open('volume all 394 gp data.csv', 'ab') as csvfile:
            csv_writer = csv.writer(csvfile, delimiter=',', quotechar='|')
            csv_writer.writerow(gp[beginning_time:end_time])

        #Append data
        hot_total_volume.append(hot[beginning_time:end_time])
        gp_total_volume.append(gp[beginning_time:end_time])

    date_1 += timedelta(days=1)

#Average volume at each time across all days
average_hot_volume = sci.nanmean(hot_total_volume, axis=0)
variance_hot_volume = sci.nanstd(hot_total_volume, axis=0)
average_gp_volume = sci.nanmean(gp_total_volume, axis=0)
variance_gp_volume = sci.nanstd(gp_total_volume, axis=0)

#Group into 3 minute increments (6 x 30 seconds)
k = 0
resolution = 6
while k in range(0, len(average_hot_volume)):
    average_hot_volume[k:k + resolution] = sum(
        average_hot_volume[k:k + resolution]) / resolution
    variance_hot_volume[k:k + resolution] = sum(
        variance_hot_volume[k:k + resolution]) / resolution
    average_gp_volume[k:k + resolution] = sum(
        average_gp_volume[k:k + resolution]) / resolution
    variance_gp_volume[k:k + resolution] = sum(
        variance_gp_volume[k:k + resolution]) / resolution
예제 #13
0
            csv_writer.writerow(hot[beginning_time:end_time])

        with open('volume all 394 gp data.csv', 'ab') as csvfile:
            csv_writer = csv.writer(csvfile, delimiter=',',quotechar='|')
            csv_writer.writerow(gp[beginning_time:end_time])

        #Append data    
        hot_total_volume.append(hot[beginning_time:end_time])
        gp_total_volume.append(gp[beginning_time:end_time])

    date_1 += timedelta(days=1)


#Average volume at each time across all days
average_hot_volume = sci.nanmean(hot_total_volume,axis=0)
variance_hot_volume = sci.nanstd(hot_total_volume,axis=0)
average_gp_volume = sci.nanmean(gp_total_volume,axis=0)
variance_gp_volume = sci.nanstd(gp_total_volume,axis=0)

#Group into 3 minute increments (6 x 30 seconds)
k=0
resolution = 6
while k in range(0,len(average_hot_volume)):
    average_hot_volume[k:k+resolution] = sum(average_hot_volume[k:k+resolution])/resolution
    variance_hot_volume[k:k+resolution] = sum(variance_hot_volume[k:k+resolution])/resolution
    average_gp_volume[k:k+resolution] = sum(average_gp_volume[k:k+resolution])/resolution
    variance_gp_volume[k:k+resolution] = sum(variance_gp_volume[k:k+resolution])/resolution
    k+=resolution

#Write out averaged 3 minute data
with open('volume 394.csv', 'ab') as csvfile:
예제 #14
0
def stddev(x):
    """ std\{%s\} := Standard deviation of %s """
    from scipy.stats.stats import nanstd

    x = notnone(x)
    return nanstd(x)
예제 #15
0
def stddev(x):
    """ std\{%s\} := Standard deviation of %s """
    from scipy.stats.stats import nanstd
    x = notnone(x)
    return nanstd(x)
예제 #16
0
파일: vpinfo.py 프로젝트: StephGagne/SAMAC
def vpinfo(CL,param,base='bg'):
    """ This method returns information on the chosen parameter from CloudObj.dttl in the cloud for all vertical scan. The averaging of the parameter is done in the particular column of the vertical scan.
        Options:
            param: string containing the title of the parameter as found in CloudObj.dttl or CloudObj.extrattl
            base: method to find the cloud base and top. Default is best guess (defBGheight) base='bg'; to use the 4-point method (defheight) base='4point'.
        Returns H:
        H["bottom"]: parameter at the cloud base
        H["top"]: parameter at the cloud top
        H["mean"]: mean parameter through the cloud
        H["median"]: median parameter through the cloud
        H["minimum"]: minimum parameter through the cloud
        H["maximum"]: maximum parameter through the cloud
        H["stdev"]: standard deviation of the parameter through the cloud
        H["delta"]: difference of parameter between the bottom and the top
        H["slope"]: delta divided by the mean thickness
        H["units"]: units of the parameter """
    if type(param)==str: pass
    else: param=str(param)
    H=dict()
    altp=[i for i,x in enumerate(CL.dttl) if x == 'altitude'][0]
    tim=[i for i,x in enumerate(CL.dttl) if x == 'time'][0]
    T=[i for i,x in enumerate(CL.dttl) if x.lower() == param.lower()]
    if len(T)==1: 
        T=T[0]
        Td=CL.data[T]
        Tunits=CL.dunit[T]
        alt=CL.data[altp]
        ta=CL.data[tim]
    elif len(T)>1: print("[vpinfo] Parameter %s was found multiple times in the basic data." %(param)); return dict()
    elif len(T)==0:
        posx=[] 
        for i,ttl in enumerate(CL.extrattl):     # for all extra datasets available
            posx=posx+[[i,j] for j,x in enumerate(ttl) if x.lower() == param.lower()]    # check all titles matching with temperature
        if len(posx)==1: 
            Td=CL.extradata[posx[0][0]][posx[0][1]]    # loading the data
            Tunits=CL.extraunit[posx[0][0]][posx[0][1]]
            j=[j for j,x in enumerate(CL.extrattl[i]) if x.lower() == 'time'][0]
            Tt=CL.extradata[posx[0][0]][j]     # loading associated time stamp
            # adapting for too short data for interpolation
            if len(Tt)<2: Td=np.ones((2,))*NaN; Tt=np.array([CL.times["cloud"][0][0],CL.times["cloud"][0][1]]);
            # adapting the time vector to a common time vector
            ta1=np.max([CL.data[tim][0],Tt[0]]); ta2=np.min([CL.data[tim][-1],Tt[-1]]);
            ta=CL.data[tim][nonzero((CL.data[tim]>=ta1)*(CL.data[tim]<=ta2))[0]]
            alt=CL.data[altp][nonzero((CL.data[tim]>=ta1)*(CL.data[tim]<=ta2))[0]]
            fT=interpolate.interp1d(Tt,Td,kind='linear')
            Td=fT(ta)
        else: print("[vpinfo] No or multiple %s found in the basic or the extra data." %(param)); return dict()
    
    H["bottom"]=list(); H["top"]=list(); H["mean"]=list(); H["median"]=list(); H["stdev"]=list(); H["delta"]=list(); H["slope"]=list(); H["units"]=list(); H["minimum"]=list();  H["maximum"]=list();
    try:
        for i in range(len(CL.times["verticloud"])):
            if base=='4point': cb=CL.props["height"][i][1]; ct=CL.props["height"][i][2];
            else: cb=CL.props["BGheight"][i][0]; ct=CL.props["BGheight"][i][1];
            ix=nonzero((alt>=cb)*(alt<=ct)*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))[0]
            if len(ix)==0:
                H["mean"].append(nan); H["median"].append(nan); H["stdev"].append(nan); H["minimum"].append(nan); H["maximum"].append(nan); H["top"].append(nan); H["bottom"].append(nan); H["delta"].append(nan); H["slope"].append(nan); H["units"].append(nan)
            else:
                H["mean"].append(float(st.nanmean(Td[ix])))
                H["median"].append(float(st.nanmedian(Td[ix])))
                H["stdev"].append(float(st.nanstd(Td[ix])))
                H["minimum"].append(float(np.nanmin(Td[ix])))
                H["maximum"].append(float(np.nanmax(Td[ix])))
                if base=='4point': 
                    if len(nonzero((alt>=ct)*(alt<=CL.props["height"][i][3])*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))[0])==0: H["top"].append(nan)
                    else: H["top"].append(float(st.nanmedian(Td[nonzero((alt>=ct)*(alt<=CL.props["height"][i][3])*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))])))
                    if len(nonzero((alt>=CL.props["height"][i][0])*(alt<=cb)*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))[0])==0: H["bottom"].append(nan)
                    else: H["bottom"].append(float(st.nanmedian(Td[nonzero((alt>=CL.props["height"][i][0])*(alt<=cb)*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))])))
                    H["delta"].append(H["bottom"][i]-H["top"][i])
                    H["slope"].append(H["delta"][i]/(np.mean([ct, CL.props["height"][i][3]])-np.mean([CL.props["height"][i][0], cb])))
                else: 
                    R=10     # plus/minus R meters around the cloud top
                    if len(nonzero((alt>=ct-R)*(alt<=ct+R)*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))[0])==0: H["top"].append(nan)
                    else: H["top"].append(float(st.nanmedian(Td[nonzero((alt>=ct-R)*(alt<=ct+R)*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))])))
                    if len(nonzero((alt>=cb-R)*(alt<=cb+R)*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))[0])==0: H["bottom"].append(nan)
                    else: H["bottom"].append(float(st.nanmedian(Td[nonzero((alt>=cb-R)*(alt<=cb+R)*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))])))
                    H["delta"].append(H["bottom"][i]-H["top"][i])
                    H["slope"].append(float(H["delta"][i]/(ct-cb)))
                H["units"].append(Tunits)
            del ix
    except: 
        if base=='4point': print("[vpinfo] Height properties must be defined first using the defheight method.")
        else: print("[vpinfo] Height properties must be defined first using the defBGheight method.")
    return H