Example #1
0
def cinfo(CL,param):
    """ This property returns information on the parameter in the cloud (all given in the units of the parameter). Note that the parameter is averaged over the entire cloud time at the altitude required (bottom, top or in-cloud) - not the case using vpinfo(CL,param).
        CloudObj.cinfo["bottom"]: param at the cloud base
        CloudObj.cinfo["top"]: param at the cloud top
        CloudObj.cinfo["mean"]: mean param through the cloud (in cloud)
        CloudObj.cinfo["median"]: median param through the cloud (in cloud)
        CloudObj.cinfo["stdev"]: standard deviation of the param through the cloud (in cloud)
        CloudObj.cinfo["delta"]: difference of param between the bottom and the top
        CloudObj.cinfo["slope"]: delta divided by the mean thickness
        The property can be accessed as e.g. CloudObj.cinfo["bottom"] or CloudObj.cinfo (dictionary) """
    H=dict()
    H["bottom"]=list(); H["top"]=list(); H["mean"]=list(); H["median"]=list(); H["stdev"]=list(); H["delta"]=list(); H["slope"]=list(); H["units"]=list(); 
    alt=[i for i,x in enumerate(CL.dttl) if x == 'altitude'][0]
    T=[i for i,x in enumerate(CL.dttl) if x == param][0]
    try:
        for i in range(len(CL.props["height"])):
            ix=nonzero((CL.data[alt]>=CL.props["height"][i][1])*(CL.data[alt]<=CL.props["height"][i][2]))
            H["bottom"].append(float(st.nanmedian(CL.data[T][nonzero((CL.data[alt]>=CL.props["height"][i][0])*(CL.data[alt]<=CL.props["height"][i][1]))])))
            H["top"].append(float(st.nanmedian(CL.data[T][nonzero((CL.data[alt]>=CL.props["height"][i][2])*(CL.data[alt]<=CL.props["height"][i][3]))])))
            H["mean"].append(float(st.nanmean(CL.data[T][ix])))
            H["median"].append(float(st.nanmedian(CL.data[T][ix])))
            H["stdev"].append(float(st.nanstd(CL.data[T][ix])))
            H["delta"].append(H["bottom"][i]-H["top"][i])
            H["slope"].append(H["delta"][i]/(np.mean([CL.props["height"][i][2], CL.props["height"][i][3]])-np.mean([CL.props["height"][i][0], CL.props["height"][i][1]])))     # units/meter
            H["units"].append(CL.dunit[T])
            del ix
    except: print("[cinfo] Height properties must be defined first using the defheight method.")
    return H
Example #2
0
 def getBoxData(self, dataset, plot, coords):
     alldata = dataset.getAggregateDataAsArray(plot)
     box = alldata[coords[0]][coords[1]]
     mean = st.nanmean(box)
     median = st.nanmedian(box)
     sigma = numpy.std(box)
     return box, (mean, median, sigma)
Example #3
0
 def getBoxData(self, dataset, plot, coords):
     alldata = dataset.getAggregateDataAsArray(plot)
     box = alldata[coords[0]][coords[1]]
     mean = st.nanmean(box)
     median = st.nanmedian(box)
     sigma = numpy.std(box)
     return box, (mean, median, sigma)
def medabsdev(x,axis=0):
    """medabsdev(x,axis=0). Calculates and returns the median absolute deviation (MAD) based on https://en.wikipedia.org/wiki/Median_absolute_deviation (last visited June 2014). This function handles NaNs and masked values (masked arrays) by ignoring them.
    x (input) is the array on which the MAD is calculated.
    axis is the axis along which the MAD is calculated. Default is 0."""
    x=copy.deepcopy(x)
    if 'array' not in str(type(x)).lower(): raise TypeError("x must be an array.")
    if 'ma' in str(type(x)).lower():
        try: x[x.mask]=nan
        except: raise TypeError("Tried to translate masks into NaNs but failed.")
    if axis==0:
        med=st.nanmedian(x)
        mad=st.nanmedian(abs(x-med))
    elif axis==1:
        med=st.nanmedian(x.transpose())
        mad=st.nanmedian(abs(x.transpose()-med))
    return mad
def fit_spec_poly5(xData, yData, dyData, order=5):
    """
    Fit a 5th order polynomial to a spectrum. To avoid overflow errors the
    X-axis data should not be large numbers (e.g.: x10^9 Hz; use GHz instead).
    """

    # Lower order limit is a line with slope
    if order < 1:
        order = 1
    if order > 5:
        order = 5

    # Estimate starting coefficients
    C1 = nanmean(np.diff(yData)) / nanmedian(np.diff(xData))
    ind = int(np.median(np.where(~np.isnan(yData))))
    C0 = yData[ind] - (C1 * xData[ind])
    C5 = 0.0
    C4 = 0.0
    C3 = 0.0
    C2 = 0.0
    inParms = [{
        'value': C5,
        'parname': 'C5'
    }, {
        'value': C4,
        'parname': 'C4'
    }, {
        'value': C3,
        'parname': 'C3'
    }, {
        'value': C2,
        'parname': 'C2'
    }, {
        'value': C1,
        'parname': 'C1'
    }, {
        'value': C0,
        'parname': 'C0'
    }]

    # Set the polynomial order
    for i in range(len(inParms)):
        if len(inParms) - i - 1 > order:
            inParms[i]['fixed'] = True
        else:
            inParms[i]['fixed'] = False

    # Function to evaluate the difference between the model and data.
    # This is minimised in the least-squared sense by the fitter
    def errFn(p, fjac=None):
        status = 0
        return status, (poly5(p)(xData) - yData) / dyData

    # Use mpfit to perform the fitting
    mp = mpfit(errFn, parinfo=inParms, quiet=True)
    return mp
Example #6
0
def getFlatNorm(flat, ysoln, low, high):
    from scipy.stats import stats
    f = getStraight(flat, ysoln, low, False)
    ftmp = numpy.where(f == 0, numpy.nan, f)
    norm = stats.nanmedian(ftmp[20:-20], 0)
    norm = numpy.where((norm <= 0) | numpy.isnan(norm), 1., norm)
    f /= norm
    img = numpy.ones((2400, 2400))
    img[low * 2:high * 2] = st.resampley(f, ysoln[3], mode='nearest')
    return iT.resamp(derotate2(img), 2)
def MAD(a, c=0.6745, axis=0):
    """
    Median Absolute Deviation along given axis of an array:
    median(abs(a - median(a))) / c
    """

    a = np.asarray(a, np.float64)
    if a.ndim == 1:
        d = _nanmedian(a)
        m = _nanmedian(np.fabs(a - d) / c)
    else:
        d = nanmedian(a, axis=axis)
        # I don't want the array to change so I have to copy it?
        if axis > 0:
            aswp = np.swapaxes(a, 0, axis)
        else:
            aswp = a
        m = nanmedian(np.fabs(aswp - d) / c, axis=0)

    return m
Example #8
0
 def aggregate_ftr_matrix(self, ftr_matrix):
     sig = []
     for ftr in ftr_matrix:
         median = stats.nanmedian(ftr)
         mean = stats.nanmean(ftr)
         std = stats.nanstd(ftr)
         # Invalid double scalars warning appears here
         skew = stats.skew(ftr) if any(ftr) else 0.0
         kurtosis = stats.kurtosis(ftr)
         sig.extend([median, mean, std, skew, kurtosis])
     return sig
Example #9
0
 def aggregate_ftr_matrix(self, ftr_matrix):
     sig = []
     for ftr in ftr_matrix:
         median = stats.nanmedian(ftr)
         mean = stats.nanmean(ftr)
         std = stats.nanstd(ftr)
         # Invalid double scalars warning appears here
         skew = stats.skew(ftr) if any(ftr) else 0.0
         kurtosis = stats.kurtosis(ftr)
         sig.extend([median, mean, std, skew, kurtosis])
     return sig
Example #10
0
	def collapse(self, keys, vName):

		"""
		desc:
			Collapse the data by a (list of) keys and get statistics on a
			dependent variable.

		arguments:
			keys:
				desc:	A key or list of keys to collapse the data on.
				type:	[list, str, unicode]
			vName:
				desc:	The dependent variable to collapse. Alternative, you can
						specifiy a function, in which case the error will be 0.
				type:	[str, unicode, function]

		returns:
			desc:	A DataMatrix with the collapsed data, with the descriptives
					statistics on `vName`.
			type:	DataMatrix
		"""

		if isinstance(keys, basestring):
			keys = [keys]

		m = [keys + ['mean', 'median', 'std', 'se', '95ci', 'count']]
		for g in self.group(keys):
			l = []
			for key in keys:
				l.append(g[key][0])
			if type(vName) == types.FunctionType:
				l.append(vName(g))
				l.append(np.nan)
				l.append(np.nan)
				l.append(np.nan)
				l.append(np.nan)
				l.append(len(g))
			else:
				a = g[vName]
				l.append(nanmean(a))
				l.append(nanmedian(a))
				l.append(nanstd(a))
				l.append(nanstd(a)/np.sqrt(a.size))
				l.append(1.96*nanstd(a)/np.sqrt(a.size))
				l.append(a.size)
			m.append(l)
		return DataMatrix(m)
Example #11
0
def run_stats(x,n):
    """runstats(x,n). Calculates and returns the running mean, median, standard deviation, and median absolute deviation (MAD). This function handles NaNs and masked values (masked arrays) by ignoring them.
    x (input) is the array on which the running statistics are calculated (only one dimension, 1D array).
    n is the number of points taken in the running statistics window."""
    x=copy.deepcopy(x)
    try: x.mask
    except: 
        x=np.ma.array(x,mask=False)

    if len(np.shape(x))>2: raise ValueError("The array provided has more than 2 dimensions, at most 1 or 2 dimensions can be handled.")
    try: [ro,co]=np.shape(x)
    except: ro=np.shape(x)[0]; co=1
    if ro==1 or co==1: 
        ro=max(ro,co)
        x=x.reshape(ro,)
    else: raise ValueError("The array must be a vector (one column or row)")
    # initializing matrix
    M=ones([ro,n])*NaN;
    M=ma.asanyarray(M)
    
    # building matrix
    if n%2==1:       # if n is odd
        for j in range(int(n/2),0,-1):
            posi=int(n/2)-j       # current position
            M[0:ro-j,posi]=x[j:]
        for j in range(1,2+int(n/2),1):
            posi=int(n/2)+j-1;
            M[j-1:,posi]=x[0:(ro+1)-j]
    elif n%2==0:        # if n is even
        for j in range(n/2,0,-1):
            posi=n/2-j
            M[0:ro-j,posi]=x[j:]
        for j in range(1,n/2+1):
            posi=n/2+j-1;
            M[j-1:,posi]=x[0:(ro+1)-j]
    else: print("Well, that's pretty weird. Are you sure n is an integer?")  
    
    M.data[M.mask]=nan
    ave=st.nanmean(M, axis=1);
    med=st.nanmedian(M, axis=1);
    stde=st.nanstd(M, axis=1);
    mad=medabsdev(M,axis=1)
    return [ave, med, stde, mad]
Example #12
0
def fit_spec_poly5(xData, yData, dyData, order=5):

    xData = np.array(xData, dtype='f8')
    yData = np.array(yData, dtype='f8')
    
    # Estimate starting coefficients
    C1 = nanmean(np.diff(yData)) / nanmedian(np.diff(xData))
    ind = int(np.median(np.where(~np.isnan(yData))))
    C0 = yData[ind] - (C1 * xData[ind])
    if order<1:
        order=1
    p0 = [0.0, 0.0, 0.0, 0.0, C1, C0]

    # Set the order
    p0 = p0[(-order-1):]

    def chisq(p, x, y):
        return np.sum( ((poly5(p)(x) - y)/ dyData)**2.0 )

    # Use minimize to perform the fit
    return op.fmin_bfgs(chisq, p0, args=(xData, yData), full_output=1)
Example #13
0
            #===========================================================
            fig = plt.figure(figsize=(3.62, 2.76))
            ax = fig.add_subplot(111)
            data = cdata[prnd,:]
            zenSpacing = 0.5
            median = []
            zen = np.linspace(0,90,int(90./zenSpacing) +1)
            for z in zen :
                criterion = ( (data[:,2] < (z + zenSpacing/2.)) &
                          (data[:,2] > (z - zenSpacing/2.)) )
                ind = np.array(np.where(criterion))[0]
                tmp = data[ind,:]
                rout = esm.reject_outliers_arg(tmp[:,3],3)
                for i in rout :
                    ax.plot(90.- z, tmp[i,3],'k.',alpha=0.5)
                median.append(nanmedian(data[ind,3]))
            ax.plot(90.-zen,median,'r-',alpha=0.5)


            for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
                    ax.get_xticklabels() + ax.get_yticklabels()):
                item.set_fontsize(8)

            ax.set_ylim([-35,35])
            plt.tight_layout()
            plt.savefig(str(prn)+"_ele.png")
            plt.close()
            #================================================
            az = np.linspace(0,360,721)
            fig = plt.figure(figsize=(3.62, 2.76))
Example #14
0
# Python 中常用的统计工具有 Numpy, Pandas, PyMC, StatsModels 等。
# Scipy 中的子库 scipy.stats 中包含很多统计上的方法。
from numpy import *
from matplotlib import pyplot

# Numpy 自带简单的统计方法:
heights = array([1.46, 1.79, 2.01, 1.75, 1.56, 1.69, 1.88, 1.76, 1.88, 1.78])
print('mean,', heights.mean())
print('min,', heights.min())
print('max', heights.max())
print('stand deviation,', heights.std())

# 导入 Scipy 的统计模块:
import scipy.stats.stats as st

print('median, ', st.nanmedian(heights))  # 忽略nan值之后的中位数
print('mode, ', st.mode(heights))  # 众数及其出现次数
print('skewness, ', st.skew(heights))  # 偏度
print('kurtosis, ', st.kurtosis(heights))  # 峰度

# 概率分布
# 常见的连续概率分布有:
# 均匀分布
# 正态分布
# 学生t分布
# F分布
# Gamma分布
# ...

# 离散概率分布:
# 伯努利分布
def transect_flow_estimator(pre_process_input_file=None):
    """
    Receives a list of ADCPTransectData objects from transect_preprocessor.py,
    and processes to find average velocities acording to options, and finally 
    outputs flows. Flows can be written to CSV or and plotted across time, and
    invidual plot produced for each transect.  Output fils=es are save to
    the outpath supplied by transect_preprocessor().
    Inputs:
        pre_process_input_file = path to a transect_preprocessor input file,
          or None to use the default file.
    """

    # initialize collector lists for time series plots
    mtimes = []
    mean_u = []
    mean_v = []
    align_angle = []
    total_flow = []
    ustars = []
    kxs = []
    kys = []
    data_file_num = -1

    # setup compression option
    if flow_netcdf_data_compression:
        zlib = True
    else:
        zlib = None

    # prepare plot line, if any
    if flow_p1_lon_lat is None or flow_p2_lon_lat is None:
        ll_pline = None
    else:
        ll_pline = np.array([flow_p1_lon_lat, flow_p2_lon_lat])

    (transects, outpath
     ) = transect_preprocessor.transect_preprocessor(pre_process_input_file)

    if flow_write_timeseries_flows_to_csv:
        logfname = os.path.join(outpath, 'transect_flow_log.csv')
        lf = open(logfname, 'wb')
        logfile = csv.writer(lf)
        logfile.writerow(
            ('filename', 'start_date', 'end_date', 'water_mode', 'bottom_mode'
             'mean_velocity_U [m/s]', 'mean_velocity V [m/s]',
             'flow_volume_U [m^3/s]', 'flow_volume_V [m^3/s]',
             'sampling_area [m^2]', 'alignment_angle [degree]', 'notes'))

    for t in transects:

        fname, ext = os.path.splitext(t.source)
        outname = os.path.join(outpath, fname)

        if flow_regrid:

            if ll_pline is not None:
                if flow_regrid_normal_to_flow:
                    print "Warning! Regridding plotline given, but options also ask for a flow-based plotline."
                    print "Ignoring flow-based plotline option..."
                if t.xy_srs is not None:
                    ll_srs = t.lonlat_srs
                    if ll_srs is None:
                        ll_srs = t.default_lonlat_srs
                    pline = adcpy.util.coordinate_transform(
                        ll_pline, ll_srs, t.xy_srs)
                else:
                    print "ADCPData must be projected to use transect_flow_estimator"
                    exit()
            else:
                if flow_regrid_normal_to_flow:
                    flows = t.calc_ensemble_flows(range_from_velocities=True)
                    pline = adcpy.util.map_flow_to_line(
                        t.xy, flows[:, 0], flows[:, 1])
                else:
                    pline = adcpy.util.map_xy_to_line(t.xy)
            t.xy_regrid(dxy=flow_regrid_dxy,
                        dz=flow_regrid_dz,
                        pline=pline,
                        sd_drop=flow_regrid_bin_sd_drop,
                        mtime_regrid=True)

        else:
            t = t.average_ensembles(flow_crossprod_ens_num_ave)

        if flow_sd_drop > 0:
            t.sd_drop(sd=flow_sd_drop,
                      sd_axis='elevation',
                      interp_holes=flow_sd_drop_interp)
            t.sd_drop(sd=flow_sd_drop,
                      sd_axis='ensemble',
                      interp_holes=flow_sd_drop_interp)
        if flow_smooth_kernel > 2:
            t.kernel_smooth(kernel_size=flow_smooth_kernel)
        if flow_extrapolate_boundaries:
            t.extrapolate_boundaries()
        if flow_rotation is not None:
            t = adcpy_recipes.transect_rotate(t, flow_rotation)

        if flow_regrid:
            UVW,UVWens,flow,survey_area = \
              adcpy_recipes.calc_transect_flows_from_uniform_velocity_grid(t,use_grid_only=False)
            Uflow = flow[0]
            Vflow = flow[1]
            U = UVW[0]
            V = UVW[1]
        else:
            U, Uflow, total_area, survey_area = t.calc_crossproduct_flow()
            V, Vflow = (np.nan, np.nan)

        if flow_plot_mean_vectors:
            fig3 = adcpy.plot.plot_ensemble_mean_vectors(
                t, title='Mean Velocity [m/s]')
            if flow_save_plots:
                adcpy.plot.plt.savefig("%s_mean_velocity.png" % outname)

        if flow_plot_flow_summmary:
            fig6 = adcpy.plot.plot_flow_summmary(t,
                                                 title='Streamwise Summary',
                                                 ures=0.1,
                                                 vres=0.1,
                                                 use_grid_flows=flow_regrid)
            if flow_save_plots:
                adcpy.plot.plt.savefig("%s_flow_summary.png" % outname)

        if flow_show_plots:
            adcpy.plot.show()
        adcpy.plot.plt.close('all')

        if (flow_save_processed_netcdf):
            fname = outname + '.flow_processed.nc'
            t.write_nc(fname, zlib=zlib)

        if flow_plot_timeseries_values or flow_write_timeseries_flows_to_csv:

            data_file_num += 1

            # must fit to line to calc dispersion
            xy_line = adcpy.util.map_xy_to_line(t.xy)
            xd, yd, dd, xy_line = adcpy.util.find_projection_distances(
                t.xy, xy_line)
            ustar, kx, ky = adcpy.util.calcKxKy(t.velocity[:, :,
                                                           0], t.velocity[:, :,
                                                                          1],
                                                dd, t.bin_center_elevation,
                                                t.bt_depth)

            if t.rotation_angle is not None:
                r_angle = t.rotation_angle * 180.0 / np.pi
            else:
                r_angle = 0.0

            if flow_write_timeseries_flows_to_csv:
                times = t.date_time_str(filter_missing=True)
                try:
                    w_mode_str = '%s' % t.raw_adcp.config.prof_mode
                    bm = t.raw_adcp.bt_mode.tolist()
                    b_mode = []
                    for bm1 in bm:
                        bmi = int(bm1)
                        if (bmi not in b_mode):
                            b_mode.append(bmi)
                    b_mode_str = ''
                    for bm1 in b_mode:
                        if (b_mode_str == ''):
                            b_mode_str = '%i' % bm1
                        else:
                            b_mode_str = '%s/%i' % (b_mode_str, bm1)
                except:
                    w_mode_str = 'Unknown'
                    b_mode_str = 'Unknown'
                logfile.writerow(
                    (t.source, times[0], times[-1], w_mode_str, b_mode_str,
                     '%7.4f' % U, '%7.4f' % V, '%10.2f' % Uflow,
                     '%10.2f' % Vflow, '%10.2f' % survey_area,
                     '%5.2f' % r_angle, t.history))

                if flow_plot_timeseries_values:
                    mtimes.append(sp.nanmedian(t.mtime))
                    mean_u.append(U)
                    mean_v.append(V)
                    total_flow.append(Uflow)
                    align_angle.append(r_angle)
                    ustars.append(ustar)
                    kxs.append(kx)
                    kys.append(ky)

    # plot timeseries data after all files have been processed
    if flow_plot_timeseries_values and data_file_num > 0:

        # sort by mtime
        mtimes = np.array(mtimes)
        nn = np.argsort(mtimes)
        mtimes = mtimes[nn]
        mean_u = np.array(mean_u)[nn]
        mean_v = np.array(mean_v)[nn]
        total_flow = np.array(total_flow)[nn]
        align_angle = np.array(align_angle)[nn]
        ustars = np.array(ustars)[nn]
        kxs = np.array(kxs)[nn]
        kys = np.array(kys)[nn]

        # plot timeseries figures
        fig_handle = plt.figure()
        plt.subplot(311)
        align_angle = np.array(align_angle)
        mtimes = np.array(mtimes)
        mean_u = np.array(mean_u)
        mean_v = np.array(mean_v)
        total_flow = np.array(total_flow)
        aa = -align_angle * np.pi / 180.0
        if np.isnan(mean_v[0]):
            # probably crossproduct flows were calculated, which has zero v flow
            mean_v[:] = 0.0
        uq = np.cos(aa) * mean_u + np.sin(aa) * mean_v
        vq = -np.sin(aa) * mean_u + np.cos(aa) * mean_v
        v_mag = np.sqrt(uq**2 + vq**2)
        vScale = np.max(v_mag)
        vScale = max(vScale, 0.126)
        qk_value = np.round(vScale * 4) / 4
        Q = plt.quiver(
            mtimes,
            np.zeros(len(mtimes)),
            uq,
            vq,
            width=0.003,
            headlength=10,
            headwidth=7,
            scale=10 * vScale,  #scale = 0.005,
            scale_units='width')
        qk = plt.quiverkey(
            Q,
            0.5,
            0.85,
            qk_value,
            r'%3.2f ' % qk_value + r'$ \frac{m}{s}$',
            labelpos='W',
        )
        plt.title('Time series data: %s' % outpath)
        ax = plt.gca()
        ax.xaxis_date()
        plt.gcf().autofmt_xdate()
        ax.yaxis.set_visible(False)
        ax.set_xticklabels([])
        plt.subplot(312)
        plt.plot(mtimes, total_flow)
        plt.ylabel('m^3/s')
        ax = plt.gca()
        ax.xaxis_date()
        plt.gcf().autofmt_xdate()
        ax.set_xticklabels([])
        plt.subplot(313)
        plt.plot(mtimes, align_angle, 'bo')
        plt.ylabel('rotation angle')
        ax = plt.gca()
        ax.xaxis_date()
        plt.gcf().autofmt_xdate()
        ts_plot = os.path.join(outpath, 'time_series_plots.png')
        fig_handle.savefig(ts_plot)

        fig_handle = plt.figure(1111)
        plt.subplot(311)
        plt.plot(mtimes, ustars)
        plt.ylabel('u* m/s')
        ax = plt.gca()
        ax.xaxis_date()
        #plt.gcf().autofmt_xdate()
        ax.set_xticklabels([])
        plt.title('Dispersion Coefficients: %s' % outpath)
        plt.subplot(312)
        plt.plot(mtimes, kxs)
        plt.ylabel('Kx m^2/s')
        ax = plt.gca()
        ax.xaxis_date()
        #plt.gcf().autofmt_xdate()
        ax.set_xticklabels([])
        plt.subplot(313)
        plt.plot(mtimes, kys, 'b')
        plt.ylabel('Ky m^2/s')
        ax = plt.gca()
        ax.xaxis_date()
        plt.gcf().autofmt_xdate()
        ts_plot = os.path.join(outpath, 'time_series_dispersion.png')
        fig_handle.savefig(ts_plot)
        plt.close('all')

    if flow_write_timeseries_flows_to_csv:
        lf.close()

    print 'transect_flow_estimator completed!'
Example #16
0
def transect_flow_estimator(pre_process_input_file=None):
    """
    Receives a list of ADCPTransectData objects from transect_preprocessor.py,
    and processes to find average velocities acording to options, and finally 
    outputs flows. Flows can be written to CSV or and plotted across time, and
    invidual plot produced for each transect.  Output fils=es are save to
    the outpath supplied by transect_preprocessor().
    Inputs:
        pre_process_input_file = path to a transect_preprocessor input file,
          or None to use the default file.
    """


    # initialize collector lists for time series plots
    mtimes = []
    mean_u = []
    mean_v = []
    align_angle = []
    total_flow = []
    ustars = []
    kxs = []
    kys = []
    data_file_num = -1

    # setup compression option
    if flow_netcdf_data_compression:
        zlib = True
    else:
        zlib = None
        
    # prepare plot line, if any
    if flow_p1_lon_lat is None or flow_p2_lon_lat is None:
        ll_pline = None
    else:
        ll_pline = np.array([flow_p1_lon_lat,flow_p2_lon_lat])

    (transects,outpath) = transect_preprocessor.transect_preprocessor(pre_process_input_file)

    if flow_write_timeseries_flows_to_csv:
        logfname = os.path.join(outpath,'transect_flow_log.csv')
        lf = open(logfname,'wb')
        logfile = csv.writer(lf)
        logfile.writerow(('filename','start_date','end_date','water_mode',
                         'bottom_mode'                        
                         'mean_velocity_U [m/s]',
                         'mean_velocity V [m/s]',
                         'flow_volume_U [m^3/s]','flow_volume_V [m^3/s]',
                         'sampling_area [m^2]',
                         'alignment_angle [degree]',
                         'notes'))
    
    for t in transects:
        
        fname, ext = os.path.splitext(t.source)
        outname = os.path.join(outpath,fname)
        
        if flow_regrid:

            if ll_pline is not None:
                if flow_regrid_normal_to_flow:
                    print "Warning! Regridding plotline given, but options also ask for a flow-based plotline."
                    print "Ignoring flow-based plotline option..."
                if t.xy_srs is not None:
                    ll_srs = t.lonlat_srs
                    if ll_srs is None:
                        ll_srs = t.default_lonlat_srs            
                    pline = adcpy.util.coordinate_transform(ll_pline,ll_srs,t.xy_srs)
                else:
                    print "ADCPData must be projected to use transect_flow_estimator"
                    exit()
            else:
                if flow_regrid_normal_to_flow:
                    flows = t.calc_ensemble_flows(range_from_velocities=True)
                    pline = adcpy.util.map_flow_to_line(t.xy,flows[:,0],flows[:,1])
                else:
                    pline = adcpy.util.map_xy_to_line(t.xy)                    
            t.xy_regrid(dxy=2.0,dz=0.25,
                        pline=pline,
                        sd_drop=flow_regrid_bin_sd_drop,
                        mtime_regrid=True)
        
        else:
            t = t.average_ensembles(flow_crossprod_ens_num_ave)
            
        if flow_sd_drop > 0:
            t.sd_drop(sd=flow_sd_drop,
                      sd_axis='elevation',
                      interp_holes=flow_sd_drop_interp)
            t.sd_drop(sd=flow_sd_drop,
                      sd_axis='ensemble',
                      interp_holes=flow_sd_drop_interp)
        if flow_smooth_kernel > 2:
            t.kernel_smooth(kernel_size = flow_smooth_kernel)
        if flow_extrapolate_boundaries:
            t.extrapolate_boundaries()
        if flow_rotation is not None:
            t = adcpy_recipes.transect_rotate(t,flow_rotation)
            
        if flow_regrid:
            UVW,UVWens,flow,survey_area = \
              adcpy_recipes.calc_transect_flows_from_uniform_velocity_grid(t,use_grid_only=False)
            Uflow = flow[0]
            Vflow = flow[1]
            U = UVW[0]
            V = UVW[1]
        else:
            U,Uflow,total_area,survey_area = t.calc_crossproduct_flow()
            V, Vflow = (np.nan,np.nan)
            
        if flow_plot_mean_vectors:
            fig3 = adcpy.plot.plot_ensemble_mean_vectors(t,title='Mean Velocity [m/s]')
            if flow_save_plots:
                adcpy.plot.plt.savefig("%s_mean_velocity.png"%outname)
            
        if flow_plot_flow_summmary:
            fig6 = adcpy.plot.plot_flow_summmary(t,title='Streamwise Summary',
                                                 ures=0.1,vres=0.1,
                                                 use_grid_flows=flow_regrid)
            if flow_save_plots:
                adcpy.plot.plt.savefig("%s_flow_summary.png"%outname)

        if flow_show_plots:
            adcpy.plot.show()
        adcpy.plot.plt.close('all')
        
        if (flow_save_processed_netcdf):
            fname = outname + '.flow_processed.nc'
            t.write_nc(fname,zlib=zlib)

        if flow_plot_timeseries_values or flow_write_timeseries_flows_to_csv:
            
            data_file_num += 1

            # must fit to line to calc dispersion
            xy_line = adcpy.util.map_xy_to_line(t.xy)
            xd,yd,dd,xy_line = adcpy.util.find_projection_distances(t.xy,xy_line)
            ustar, kx, ky = adcpy.util.calcKxKy(t.velocity[:,:,0],t.velocity[:,:,1],
                                     dd,t.bin_center_elevation,t.bt_depth)
                                     
            if t.rotation_angle is not None:
                r_angle = t.rotation_angle*180.0/np.pi
            else:
                r_angle = 0.0
                    
            if flow_write_timeseries_flows_to_csv:
                times = t.date_time_str(filter_missing=True)
                try:
                    w_mode_str = '%s'%t.raw_adcp.config.prof_mode
                    bm = t.raw_adcp.bt_mode.tolist()
                    b_mode = [];
                    for bm1 in bm:
                        bmi = int(bm1)
                        if (bmi not in b_mode):
                            b_mode.append(bmi)   
                    b_mode_str = ''
                    for bm1 in b_mode:                           
                        if (b_mode_str == ''):
                            b_mode_str = '%i'%bm1                               
                        else:
                            b_mode_str = '%s/%i'%(b_mode_str,bm1)
                except:
                    w_mode_str = 'Unknown'
                    b_mode_str = 'Unknown'
                logfile.writerow((t.source,
                                 times[0],
                                 times[-1],
                                 w_mode_str,
                                 b_mode_str,
                                 '%7.4f'%U,
                                 '%7.4f'%V,
                                 '%10.2f'%Uflow,
                                 '%10.2f'%Vflow,
                                 '%10.2f'%survey_area,
                                 '%5.2f'%r_angle,
                                 t.history))
        
                if flow_plot_timeseries_values:
                    mtimes.append(sp.nanmedian(t.mtime))
                    mean_u.append(U)
                    mean_v.append(V)
                    total_flow.append(Uflow)
                    align_angle.append(r_angle)
                    ustars.append(ustar)
                    kxs.append(kx)
                    kys.append(ky)

    # plot timeseries data after all files have been processed
    if flow_plot_timeseries_values and data_file_num>0:

        # plot timeseries figures
        fig_handle = plt.figure()
        plt.subplot(311)
        align_angle= np.array(align_angle)
        mtimes = np.array(mtimes)
        mean_u = np.array(mean_u)
        mean_v = np.array(mean_v)
        total_flow = np.array(total_flow)
        aa = -align_angle*np.pi/180.0
        uq = np.cos(aa)*mean_u + np.sin(aa)*mean_v
        vq = -np.sin(aa)*mean_u + np.cos(aa)*mean_v
        v_mag = np.sqrt(uq**2 + vq**2)
        vScale = np.max(v_mag)
        vScale = max(vScale,0.126)
        qk_value = np.round(vScale*4)/4
        Q = plt.quiver(mtimes,np.zeros(len(mtimes)),uq,vq,
                       width=0.003,
                       headlength=10,
                       headwidth=7,
                       scale = 10*vScale,   #scale = 0.005,
                       scale_units = 'width'
                       )
        qk = plt.quiverkey(Q, 0.5, 0.85, qk_value, 
                       r'%3.2f '%qk_value + r'$ \frac{m}{s}$', labelpos='W',
                       )
        plt.title('Time series data: %s'%outpath)
        ax = plt.gca()
        ax.xaxis_date()
        plt.gcf().autofmt_xdate()
        ax.yaxis.set_visible(False)
        ax.set_xticklabels([])
        plt.subplot(312)
        plt.plot(mtimes,total_flow)
        plt.ylabel('m^3/s')
        ax = plt.gca()
        ax.xaxis_date()
        plt.gcf().autofmt_xdate()
        ax.set_xticklabels([])
        plt.subplot(313)
        plt.plot(mtimes,align_angle,'bo')
        plt.ylabel('rotation angle')
        ax = plt.gca()
        ax.xaxis_date()
        plt.gcf().autofmt_xdate()
        ts_plot = os.path.join(outpath,'time_series_plots.png')
        fig_handle.savefig(ts_plot)

        fig_handle = plt.figure(1111)
        plt.subplot(311)
        plt.plot(mtimes,ustars)
        plt.ylabel('u* m/s')
        ax = plt.gca()
        ax.xaxis_date()
        #plt.gcf().autofmt_xdate()
        ax.set_xticklabels([])
        plt.title('Dispersion Coefficients: %s'%outpath)
        plt.subplot(312)
        plt.plot(mtimes,kxs)
        plt.ylabel('Kx m^2/s')
        ax = plt.gca()
        ax.xaxis_date()
        #plt.gcf().autofmt_xdate()
        ax.set_xticklabels([])
        plt.subplot(313)
        plt.plot(mtimes,kys,'b')
        plt.ylabel('Ky m^2/s')
        ax = plt.gca()
        ax.xaxis_date()
        plt.gcf().autofmt_xdate()
        ts_plot = os.path.join(outpath,'time_series_dispersion.png')
        fig_handle.savefig(ts_plot)
        plt.close('all')
    
    if flow_write_timeseries_flows_to_csv:
        lf.close()
        
    print 'transect_flow_estimator completed!'
def process_file(filename,outfile,sg07,options):
    input = open(filename,'rt')
    fwtS = open(outfile,'wb')
    region = options.ex/2
    posS = ""
    negS = ""
    posnoS = ""
    negnoS = ""
    allS   = ""
    countS = 0
    countnoS = 0
    tagS = []
    tagnoS = []
    std = []
    for line in input:
        if line.startswith("#"):
            fwtS.write(line)
            continue
        if(len(line.split("\t")) == 9):
            chrom,junk,junk,start,end,tag,strand,junk,attr = line.split("\t")
            
            if float(get_std(attr.rstrip())) == 0:
                tagS.append(int(tag))
                if strand == "+":
                    posS = posS+line
                else:
                    negS = negS+line
                    
            else:
                countnoS +=1
                std.append(float(get_std(attr.rstrip())))
                tagnoS.append(int(tag))
                allS = allS+line
                if strand == "+":
                    posnoS = posnoS+line
                else:
                    negnoS = negnoS+line
                    
    #posS_int_negnoS = pybedtools.BedTool(posS,from_string=True).slop(g=sg07,l=10,r=10).intersect(pybedtools.BedTool(negnoS,from_string=True),u=True) # add half the exclusion zone to start and end coordinate
    #negS_int_posnoS = pybedtools.BedTool(negS,from_string=True).slop(g=sg07,l=10,r=10).intersect(pybedtools.BedTool(posnoS,from_string=True),u=True)
    try:
    
        posS_int_negnoS = pybedtools.BedTool(posS,from_string=True).intersect(pybedtools.BedTool(negnoS,from_string=True).slop(g=sg07,l=region,r=region),u=True) # add half the exclusion zone to start and end coordinate
        negS_int_posnoS = pybedtools.BedTool(negS,from_string=True).intersect(pybedtools.BedTool(posnoS,from_string=True).slop(g=sg07,l=region,r=region),u=True)
        fullFile = allS+str(posS_int_negnoS)+str(negS_int_posnoS) # merging singletons and non singletons together
        countS = posS_int_negnoS.count() + negS_int_posnoS.count() # Couting all the singletongs
        fwtS.write(str(pybedtools.BedTool(fullFile,from_string=True).sort()))
    except pybedtools.helpers.BEDToolsError:
        # if the peak call file had no singletons in it.
        print "The file had no singletons"
        countS = 0
        fullFile = allS
        fwtS.write(str(pybedtools.BedTool(fullFile,from_string=True).sort()))
        
    ratio = signal2noise(tagnoS,tagS) # calculating the signal to noise ratio.   
    #fullFile = allS+str(posS_int_negnoS)+str(negS_int_posnoS) # merging singletons and non singletons together
    #countS = posS_int_negnoS.count() + negS_int_posnoS.count() # Couting all the singletongs
    #fwtS.write(str(pybedtools.BedTool(fullFile,from_string=True).sort()))
    #ratio = signal2noise(tagnoS,tagS) # calculating the signal to noise ratio.
    allInfo[os.path.basename(filename)] = ".\t.\t"+str(countnoS)+"\t"+str(countS)+"\t"+str(st.nanmedian(tagnoS))+"\t"+str(st.nanmean(tagnoS))+"\t"+str(st.nanmedian(std))+"\t"+str(st.nanmean(std))+"\t"+str(ratio)
Example #18
0
def vpinfo(CL,param,base='bg'):
    """ This method returns information on the chosen parameter from CloudObj.dttl in the cloud for all vertical scan. The averaging of the parameter is done in the particular column of the vertical scan.
        Options:
            param: string containing the title of the parameter as found in CloudObj.dttl or CloudObj.extrattl
            base: method to find the cloud base and top. Default is best guess (defBGheight) base='bg'; to use the 4-point method (defheight) base='4point'.
        Returns H:
        H["bottom"]: parameter at the cloud base
        H["top"]: parameter at the cloud top
        H["mean"]: mean parameter through the cloud
        H["median"]: median parameter through the cloud
        H["minimum"]: minimum parameter through the cloud
        H["maximum"]: maximum parameter through the cloud
        H["stdev"]: standard deviation of the parameter through the cloud
        H["delta"]: difference of parameter between the bottom and the top
        H["slope"]: delta divided by the mean thickness
        H["units"]: units of the parameter """
    if type(param)==str: pass
    else: param=str(param)
    H=dict()
    altp=[i for i,x in enumerate(CL.dttl) if x == 'altitude'][0]
    tim=[i for i,x in enumerate(CL.dttl) if x == 'time'][0]
    T=[i for i,x in enumerate(CL.dttl) if x.lower() == param.lower()]
    if len(T)==1: 
        T=T[0]
        Td=CL.data[T]
        Tunits=CL.dunit[T]
        alt=CL.data[altp]
        ta=CL.data[tim]
    elif len(T)>1: print("[vpinfo] Parameter %s was found multiple times in the basic data." %(param)); return dict()
    elif len(T)==0:
        posx=[] 
        for i,ttl in enumerate(CL.extrattl):     # for all extra datasets available
            posx=posx+[[i,j] for j,x in enumerate(ttl) if x.lower() == param.lower()]    # check all titles matching with temperature
        if len(posx)==1: 
            Td=CL.extradata[posx[0][0]][posx[0][1]]    # loading the data
            Tunits=CL.extraunit[posx[0][0]][posx[0][1]]
            j=[j for j,x in enumerate(CL.extrattl[i]) if x.lower() == 'time'][0]
            Tt=CL.extradata[posx[0][0]][j]     # loading associated time stamp
            # adapting for too short data for interpolation
            if len(Tt)<2: Td=np.ones((2,))*NaN; Tt=np.array([CL.times["cloud"][0][0],CL.times["cloud"][0][1]]);
            # adapting the time vector to a common time vector
            ta1=np.max([CL.data[tim][0],Tt[0]]); ta2=np.min([CL.data[tim][-1],Tt[-1]]);
            ta=CL.data[tim][nonzero((CL.data[tim]>=ta1)*(CL.data[tim]<=ta2))[0]]
            alt=CL.data[altp][nonzero((CL.data[tim]>=ta1)*(CL.data[tim]<=ta2))[0]]
            fT=interpolate.interp1d(Tt,Td,kind='linear')
            Td=fT(ta)
        else: print("[vpinfo] No or multiple %s found in the basic or the extra data." %(param)); return dict()
    
    H["bottom"]=list(); H["top"]=list(); H["mean"]=list(); H["median"]=list(); H["stdev"]=list(); H["delta"]=list(); H["slope"]=list(); H["units"]=list(); H["minimum"]=list();  H["maximum"]=list();
    try:
        for i in range(len(CL.times["verticloud"])):
            if base=='4point': cb=CL.props["height"][i][1]; ct=CL.props["height"][i][2];
            else: cb=CL.props["BGheight"][i][0]; ct=CL.props["BGheight"][i][1];
            ix=nonzero((alt>=cb)*(alt<=ct)*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))[0]
            if len(ix)==0:
                H["mean"].append(nan); H["median"].append(nan); H["stdev"].append(nan); H["minimum"].append(nan); H["maximum"].append(nan); H["top"].append(nan); H["bottom"].append(nan); H["delta"].append(nan); H["slope"].append(nan); H["units"].append(nan)
            else:
                H["mean"].append(float(st.nanmean(Td[ix])))
                H["median"].append(float(st.nanmedian(Td[ix])))
                H["stdev"].append(float(st.nanstd(Td[ix])))
                H["minimum"].append(float(np.nanmin(Td[ix])))
                H["maximum"].append(float(np.nanmax(Td[ix])))
                if base=='4point': 
                    if len(nonzero((alt>=ct)*(alt<=CL.props["height"][i][3])*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))[0])==0: H["top"].append(nan)
                    else: H["top"].append(float(st.nanmedian(Td[nonzero((alt>=ct)*(alt<=CL.props["height"][i][3])*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))])))
                    if len(nonzero((alt>=CL.props["height"][i][0])*(alt<=cb)*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))[0])==0: H["bottom"].append(nan)
                    else: H["bottom"].append(float(st.nanmedian(Td[nonzero((alt>=CL.props["height"][i][0])*(alt<=cb)*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))])))
                    H["delta"].append(H["bottom"][i]-H["top"][i])
                    H["slope"].append(H["delta"][i]/(np.mean([ct, CL.props["height"][i][3]])-np.mean([CL.props["height"][i][0], cb])))
                else: 
                    R=10     # plus/minus R meters around the cloud top
                    if len(nonzero((alt>=ct-R)*(alt<=ct+R)*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))[0])==0: H["top"].append(nan)
                    else: H["top"].append(float(st.nanmedian(Td[nonzero((alt>=ct-R)*(alt<=ct+R)*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))])))
                    if len(nonzero((alt>=cb-R)*(alt<=cb+R)*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))[0])==0: H["bottom"].append(nan)
                    else: H["bottom"].append(float(st.nanmedian(Td[nonzero((alt>=cb-R)*(alt<=cb+R)*(ta>=CL.times["verticloud"][i][0])*(ta<=CL.times["verticloud"][i][1]))])))
                    H["delta"].append(H["bottom"][i]-H["top"][i])
                    H["slope"].append(float(H["delta"][i]/(ct-cb)))
                H["units"].append(Tunits)
            del ix
    except: 
        if base=='4point': print("[vpinfo] Height properties must be defined first using the defheight method.")
        else: print("[vpinfo] Height properties must be defined first using the defBGheight method.")
    return H     
def process_file(fname,out_noS,out_onlyS):
    input = open(fname,'rt')
    fnoS = open(out_noS,'wb')
    fonlyS = open(out_onlyS,'wb')
    tags = []  # tags in non singletons
    Stags = [] # Tags in singletons
    std = []   
    singletonCount = 0
    for line in input:
        if line.startswith("#"):
            fnoS.write(line)
            continue
        if(len(line.split("\t")) == 9):
            chrom,junk,junk,start,end,tag,strand,junk,attr = line.split("\t")
            
            if float(get_std(attr.rstrip())) == 0:
                singletonCount+=1
                Stags.append(int(tag))
                fonlyS.write(line)
            else:
                tags.append(int(tag))
                std.append(float(get_std(attr.rstrip())))
                fnoS.write(line)
    ratio = signal2noise(tags,Stags)
    filedict[os.path.basename(fname)] = ".\t.\t"+str(len(tags))+"\t"+str(singletonCount)+"\t"+str(st.nanmedian(tags))+"\t"+str(st.nanmean(tags))+"\t"+str(st.nanmedian(std))+"\t"+str(st.nanmean(std))+"\t"+str(ratio)
Example #20
0
            fig = plt.figure(figsize=(3.62, 2.76))
            ax = fig.add_subplot(111)
            data = cdata[prnd, :]
            zenSpacing = 0.5
            median = []
            zen = np.linspace(0, 90, int(90. / zenSpacing) + 1)
            for z in zen:
                criterion = ((data[:, 2] <
                              (z + zenSpacing / 2.)) & (data[:, 2] >
                                                        (z - zenSpacing / 2.)))
                ind = np.array(np.where(criterion))[0]
                tmp = data[ind, :]
                rout = esm.reject_outliers_arg(tmp[:, 3], 3)
                for i in rout:
                    ax.plot(90. - z, tmp[i, 3], 'k.', alpha=0.5)
                median.append(nanmedian(data[ind, 3]))
            ax.plot(90. - zen, median, 'r-', alpha=0.5)

            for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
                         ax.get_xticklabels() + ax.get_yticklabels()):
                item.set_fontsize(8)

            ax.set_ylim([-35, 35])
            plt.tight_layout()
            plt.savefig(str(prn) + "_ele.png")
            plt.close()
            #================================================
            az = np.linspace(0, 360, 721)
            fig = plt.figure(figsize=(3.62, 2.76))

            ax = fig.add_subplot(111, polar=True)
Example #21
0
t1 = time.clock()
print "time to convert to numpy arrays:", (t1 - t0)

#####
# python array test
#####

t0 = time.clock()
#####
# numpy array test
#####
for test in natests:
    # find mean
    nameans.append(test.mean())
    # find median
    medianval = nanmedian(test)
    namedians.append(medianval)
    # normalise to median
    norm = test / medianval
    nanormalised.append(norm)
    # log2
    nalogged.append(numpy.log2(norm))
t1 = time.clock()
print "time for numpy test:", (t1 - t0)

#####
# validation
#####
try:
    assert (listsEqual(means, nameans))
except AssertionError: