Python SizeDist_TSの例、atmPy.aerosols.size_distribution.sizedistribution.SizeDist_TS Pythonの例

コード例 #1

0

ファイルを表示

ファイル: icartt.py プロジェクト: hagne/atm-py

def uhsas2sizedist(df):
    """
    Creates size distribution time series instance from uhsas data (as 
    returned by the read_file function)

    Parameters
    ----------
    df : pandas.DataFrame
        as put out by the read_file function.

    Returns
    -------
    dist : TYPE
        DESCRIPTION.

    """
    ## make bins (based on whats mentioned in the header)
    bins = _np.linspace(40, 1000, 99)

    ## the size distribution data
    data = df.iloc[:, :-1].copy()
    data.columns = bins

    ### to my knowledge the uhsas can not measure below ~70 nm
    data_trunc = data.loc[:, 69:]

    # make the size distribution
    bined, _ = _db.bincenters2binsANDnames(data_trunc.columns.values)
    dist = _sd.SizeDist_TS(data_trunc, bined, 'numberConcentration')
    return dist

コード例 #2

0

ファイルを表示

ファイル: _netCDF.py プロジェクト: fanmei/atm-py

    def _concat(self, arm_data_objs, close_gaps=True):
        for att in self._concatable:
            first_object = getattr(arm_data_objs[0], att)
            which_type = type(first_object).__name__
            data_period = first_object._data_period
            if which_type == 'TimeSeries_2D':
                value = _timeseries.TimeSeries_2D(
                    _pd.concat([getattr(i, att).data for i in arm_data_objs]))
            elif which_type == 'TimeSeries':
                value = _timeseries.TimeSeries(
                    _pd.concat([getattr(i, att).data for i in arm_data_objs]))
            elif which_type == 'AMS_Timeseries_lev01':
                value = _AMS.AMS_Timeseries_lev01(
                    _pd.concat([getattr(i, att).data for i in arm_data_objs]))
            elif which_type == 'SizeDist_TS':
                # value = _AMS.AMS_Timeseries_lev01(pd.concat([getattr(i, att).data for i in arm_data_objs]))
                data = _pd.concat(
                    [getattr(i, att).data for i in arm_data_objs])
                value = _sizedistribution.SizeDist_TS(
                    data,
                    getattr(arm_data_objs[0], att).bins, 'dNdlogDp')
            elif which_type == 'TimeSeries_3D':
                value = _timeseries.TimeSeries_3D(
                    _pd.concat([getattr(i, att).data for i in arm_data_objs]))
            else:
                raise TypeError(
                    '%s is not an allowed type here (TimeSeries_2D, TimeSeries)'
                    % which_type)

            value._data_period = data_period
            if close_gaps:
                setattr(self, att, value.close_gaps())
            else:
                setattr(self, att, value)

コード例 #3

0

ファイルを表示

ファイル: smps.py プロジェクト: hagne/atm-py

def read_file(fn):
    out = {}
    df = pd.read_csv(fn)
    df.index = pd.to_datetime(df.DateTimeUTC)
    df.drop('DateTimeUTC', axis=1, inplace=True)
    # df.shape

    dist = df.loc[:, [i for i in df.columns
                      if i[:2] == 'Nn']].copy().astype(float)
    dist.columns = df.loc[:, [i for i in df.columns
                              if i[:2] == 'Ns']].iloc[0].astype(float) * 1000

    #     dist.index = pd.to_datetime(df.DateTimeUTC)

    dist = sd.SizeDist_TS(dist,
                          db.bincenters2binsANDnames(dist.columns.values)[0],
                          'dNdlogDp')
    dist = dist.convert2dNdlogDp()
    out['size_distribution'] = dist

    rest = df.drop([i for i in df.columns if i[:2] == 'Nn'], axis=1)
    rest = rest.drop([i for i in df.columns if i[:2] == 'Ns'], axis=1)
    #     rest = rest.rename({h['cpd3']: h['mylabel'] for h in header_dict}, axis = 1)
    out['rest'] = rest
    return out

コード例 #4

0

ファイルを表示

def _read_csv(fname, norm2time = True, norm2flow = True):
    uhsas = _readFromFakeXLS(fname)
#     return uhsas
    sd,hk = _separate_sizedist_and_housekeep(uhsas, norm2time = norm2time, norm2flow = norm2flow)
    hk = timeseries.TimeSeries(hk)
#     return size_distribution,hk
    bins = _get_bins(sd)
#     return bins
    dist = sizedistribution.SizeDist_TS(sd, bins, "numberConcentration")
    return dist, hk

コード例 #5

0

ファイルを表示

ファイル: serial.py プロジェクト: fanmei/atm-py

def read_radiosonde_csv(fname, cal):
    """reads a csv file and returns a TimeSeries

    Parameters
    ----------
    fname: str
        Name of file to be opend
    calibration: str or calibration instance
        Either pass the name of the file containing the calibration data, or a calibration instance.

    """

    df = pd.read_csv(fname, header=15)

    fkt = lambda x: x.lstrip(' ').replace(' ', '_')
    col_new = [fkt(i) for i in df.columns.values]
    df.columns = col_new

    time = df['date_[y-m-d_GMT]'] + df['time_[h:m:s_GMT]'] + '.' + df[
        'milliseconds'].astype(str)
    df.index = pd.Series(
        pd.to_datetime(time, format=time_tools.get_time_formate()))

    df[df == 99999.000] = np.nan

    alt = df['GPS_altitude_[km]'].copy()
    df['Altitude'] = alt * 1e3
    df.rename(columns={
        'GPS_latitude': 'Lat',
        'GPS_longitude': 'Lon'
    },
              inplace=True)

    bins = []
    for k in df.keys():
        if 'Bin' in k:
            bins.append(k)
    #         print(k)
#     print(bins)
    sd = df.loc[:, bins]

    hk = df.drop(bins, axis=1)

    hk = timeseries.TimeSeries(hk)
    hk.data.sort_index(inplace=True)
    hk.data.Altitude.interpolate(inplace=True)
    hk.data['temperature_K'] = hk.data[
        'iMet_air_temperature_(corrected)_[deg_C]'] + 273.15
    hk.data['pressure_Pa'] = hk.data['iMet_pressure_[mb]'] * 100
    #     fname_cal = '/Users/htelg/data/POPS_calibrations/150622_china_UAV.csv'
    cal = calibration.read_csv(cal)
    ib = cal.get_interface_bins(20)
    sd = sizedistribution.SizeDist_TS(
        sd, ib['binedges_v_int'].values.transpose()[0], 'numberConcentration')
    return sd, hk

コード例 #6

0

ファイルを表示

ファイル: _tdmasize.py プロジェクト: fanmei/atm-py

    def _parse_netCDF(self):
        super(ArmDatasetSub, self)._parse_netCDF()

        df = pd.DataFrame(self._read_variable('number_concentration'),
                          index=self.time_stamps)

        d = self._read_variable('diameter')
        bins, colnames = diameter_binning.bincenters2binsANDnames(d[:] * 1000)

        self.size_distribution = sizedistribution.SizeDist_TS(
            df, bins, 'dNdlogDp')
        self.size_distribution._data_period = self._data_period

コード例 #7

0

ファイルを表示

ファイル: _netCDF.py プロジェクト: hagne/atm-py

    def _concat(self, arm_data_objs, close_gaps=True):
        for att in self._concatable:
            first_object = getattr(arm_data_objs[0], att)
            which_type = type(first_object).__name__
            data_period = first_object._data_period
            if which_type == 'TimeSeries_2D':
                value = _timeseries.TimeSeries_2D(
                    _pd.concat([getattr(i, att).data for i in arm_data_objs]))
            elif which_type == 'TimeSeries':
                value = _timeseries.TimeSeries(
                    _pd.concat([getattr(i, att).data for i in arm_data_objs]))
            elif which_type == 'AMS_Timeseries_lev01':
                value = _AMS.AMS_Timeseries_lev01(
                    _pd.concat([getattr(i, att).data for i in arm_data_objs]))
            elif which_type == 'SizeDist_TS':
                # value = _AMS.AMS_Timeseries_lev01(pd.concat([getattr(i, att).data for i in arm_data_objs]))
                data = _pd.concat(
                    [getattr(i, att).data for i in arm_data_objs])
                value = _sizedistribution.SizeDist_TS(
                    data,
                    getattr(arm_data_objs[0], att).bins,
                    'dNdlogDp',
                    ignore_data_gap_error=True,
                )
            elif which_type == 'TimeSeries_3D':
                value = _timeseries.TimeSeries_3D(
                    _pd.concat([getattr(i, att).data for i in arm_data_objs]))
            else:
                raise TypeError(
                    '%s is not an allowed type here (TimeSeries_2D, TimeSeries)'
                    % which_type)

            if hasattr(first_object, 'availability'):
                try:
                    avail_concat = _pd.concat([
                        getattr(i, att).availability.availability
                        for i in arm_data_objs
                    ])
                    avail = Data_Quality(None, avail_concat, None,
                                         first_object.flag_info)
                    value.availability = avail
                except:
                    _warnings.warn(
                        'availability could not be concatinated make sure you converted it to a pandas frame at some point!'
                    )
            value._data_period = data_period
            if close_gaps:
                setattr(self, att, value.close_gaps())
            else:
                setattr(self, att, value)

コード例 #8

0

ファイルを表示

ファイル: aosuhsas.py プロジェクト: hagne/atm-py

def read_netCDF(fname):
    # fname = '/Volumes/HTelg_4TB_Backup/arm_data/OLI/uhsas/oliaosuhsasM1.a1.20170401.000008.nc'

    if type(fname) == str:
        fname = [fname]

    sds = []
    for fn in fname:
        data = _xr.open_dataset(fn)

        if data.sampling_interval.split()[1] != 'seconds':
            raise ValueError('This should be seconds, but it is {}.'.format(
                data.sampling_interval.split()[1]))

        if not _np.all((data.upper_size_limit.data[:-1] -
                        data.lower_size_limit.data[1:]) == 0):
            raise ValueError('Something is wrong with the bins')

        # flow rate variable name changed at some point
        if 'sample_flow_rate' in data.variables.keys():
            flowrate = data.sample_flow_rate
        elif 'sampling_volume' in data.variables.keys():
            flowrate = data.sampling_volume

        if flowrate.units not in ['sccm', 'cc/min']:
            raise ValueError('units has to be sccm, but is {}.'.format(
                flowrate.units))

        sd = data.size_distribution.to_pandas()

        # normalize total numbers to numbers/(cc)
        ## normalize to integration interval
        sd /= float(data.sampling_interval.split()
                    [0])  # normalize to integration interval

        ## normalize to flow rate
        flowrate = flowrate.values / 60.
        sd = sd.divide(flowrate, axis=0)

        sds.append(sd)

    sd = _pd.concat(sds).sort_index()

    binedges = _np.append(data.lower_size_limit.data,
                          data.upper_size_limit.data[-1])
    sdts = _sizedist.SizeDist_TS(sd, binedges, 'numberConcentration')
    sdts._data_period = float(data.sampling_interval.split()[0])
    sdts = sdts.convert2dNdlogDp()
    return sdts

コード例 #9

0

ファイルを表示

ファイル: _tdmaapssize.py プロジェクト: hagne/atm-py

    def _parse_netCDF(self):
        super(ArmDatasetSub, self)._parse_netCDF()

        data = self._read_variable('number_concentration_DMA_APS')
        df = pd.DataFrame(data['data'], index=self.time_stamps)

        d = self._read_variable('diameter')['data']
        bins, colnames = diameter_binning.bincenters2binsANDnames(d[:] * 1000)

        self.size_distribution = sizedistribution.SizeDist_TS(
            df,
            bins,
            'dNdlogDp',
            ignore_data_gap_error=True,
            # fill_data_gaps_with = np.nan
        )
        self.size_distribution._data_period = self._data_period
        self.size_distribution.flag_info = self.flag_info
        availability = pd.DataFrame(data['availability'],
                                    index=self.time_stamps)
        self.size_distribution.availability = Data_Quality(
            self, availability, data['availability_type'], self.flag_info)

コード例 #10

0

ファイルを表示

ファイル: inversion.py プロジェクト: hagne/atm-py

def extract_sizedistribution(df):
    #### get the size distribution data
    cols = df.columns
    cols = [i for i in cols if i.replace('.', '').isnumeric()]
    dist = df.loc[:, cols]
    if len(cols) == 0:
        return False

    # create bins for atmpy
    bins, _ = atmdb.bincenters2binsANDnames(np.array([float(i) for i in cols]))
    bins *= 2  #radius to diameter
    bins *= 1e3  # um to nm

    #### create sizedistribution instance
    #### todo: there is a scaling error since AERONET uses 'dVdlnDp' and I use 'dVdlogDp'
    dist_ts = atmsd.SizeDist_TS(
        dist,
        bins,
        'dVdlogDp',
        # fill_data_gaps_with=np.nan,
        ignore_data_gap_error=True,
    )

    return dist_ts

コード例 #11

0

ファイルを表示

def process(ipmatchrow,
            folders,
            test=False,
            raise_error=True,
            path2product_file=None):
    # imet
    ####
    imet = open_iMet(folders['path2imet_folder'].joinpath(ipmatchrow.fn_imet))
    didit = set_altitude_column(imet, ipmatchrow.which_alt)

    if not didit:
        return False

    ## fill missing timestamps with nans
    imet = imet.resample('1s').mean()

    start_time, end_time = imet.index.min(), imet.index.max()

    # POPS
    ds = xr.open_dataset(
        folders['path2pops_folder'].joinpath(ipmatchrow.fn_pops + '.nc'))

    ## size distribution
    dist = ds.size_distributions.to_pandas()
    dist = dist.resample('1s').mean()
    dist = dist.truncate(start_time, end_time)

    dist_ts = sd.SizeDist_TS(
        dist,
        size_distribution.diameter_binning.bincenters2binsANDnames(
            ds.bincenters.values)[0], 'dNdlogDp')
    dist_ts._data_period = 1

    particle_no_concentration = dist_ts.particle_number_concentration.data.copy(
    )

    particle_mean_d = dist_ts.particle_mean_diameter.data.copy()

    ## housekeeping
    df = ds.housekeeping.to_pandas()
    df = df.Altitude
    df = df.resample('1s').mean()
    df = df.truncate(start_time, end_time)

    # merge
    tbs = imet.copy()
    tbs['pops_particle_number_concentration'] = particle_no_concentration
    tbs['pops_particle_mean_diameter'] = particle_mean_d
    tbs['test_POPS_altitude'] = df.copy()

    # met
    ## ground pressure
    tbs['atm_pressure_ground'] = load_met_files(start_time, end_time, folders)

    # retrievals
    ## potential temperatur
    add_eqiv_potential_temp(tbs)

    # create xarray dataset
    dstbs = xr.Dataset(tbs)
    dstbs['pops_size_distribution'] = dist

    # additional retrievals that take the xarray dataset to work with
    try:
        add_cloud_base_distance_and_transit(dstbs)
    except:
        if raise_error:
            print('Error in adding adding cloud base stuff: ', sys.exc_info())
        else:
            return dstbs
    # add_sectioning(dstbs, folders=folders, path2product_file = path2product_file)
    add_uhsas_stuff(dstbs, dist_ts, uhsas_folder=folders['path2uhsas'])
    try:
        add_mwr_products(dstbs, folders['path2mwr'])
    except Exception as e:
        txt = e.__str__()
        warnings.warn(txt)
    add_cloud_top(dstbs, folders['path2cloudtop'])

    if test:
        out = {}
        out['tbs'] = tbs
        out['start'] = start_time
        out['end'] = end_time
        return out
    else:
        return dstbs

コード例 #12

0

ファイルを表示

    def _peak2Distribution(self,
                           bins=defaultBins,
                           distributionType='number',
                           differentialStyle=False):
        """Action required: clean up!
        Returns the particle size distribution normalized in various ways
        distributionType
        dNdDp, should be fixed to that, change to other types later once the distribution is created!
        old:
            \t calibration: this will create a intensity distribution instead of size distribution. bins should only be a number of bins which will be logaritmically spaced
            \t number:\t numbers only $\mu m^{-1}\, cm^{-3}$
            \t surface:\t surface area distribution, unit: $\mu m\, cm^{-3}$
            \t volume:\t  volume distribution, unit: $\mu m^{2}\, cm^{-3}$
        differentialStyle:\t     if False a raw histogram will be created, else:
            \t dNdDp: \t      distribution normalized to the bin width, bincenters are given by (Dn+Dn+1)/2
            \t dNdlogDp:\t    distribution normalized to the log of the bin width, bincenters are given by 10**((logDn+logDn+1)/2)
    
        """
        notMasked = np.where(self.data.Masked == 0)
        # too_big_condi = np.where(self.data.Masked == 2)

        unique = np.unique(self.data.index.values[notMasked])
        N = np.zeros((unique.shape[0], bins.shape[0] - 1))
        too_big = np.zeros(unique.shape[0])

        for e, i in enumerate(unique):
            condi = np.where(
                np.logical_and(self.data.Masked == 0,
                               self.data.index.values == i))
            if distributionType == 'calibration':
                process = self.data.Amplitude.values[condi]
            else:
                process = self.data.Diameter.values[condi]
            n, edg = np.histogram(process, bins=bins)
            N[e] = n
            too_big[e] = np.logical_and(self.data.Masked == 2,
                                        self.data.index.values == i).sum()

        N = N.astype(np.float)
        too_big = too_big.astype(np.float)

        deltaT = (unique[1:] - unique[:-1]) / np.timedelta64(1, 's')

        deltaT_sl = np.append(deltaT[0], deltaT)
        deltaT = np.repeat(np.array([deltaT_sl]), bins.shape[0] - 1, axis=0)
        N /= deltaT.transpose()
        too_big /= deltaT_sl.transpose()
        binwidth = edg[1:] - edg[:-1]

        if not differentialStyle:
            pass

        elif differentialStyle == 'dNdDp':
            N = N / binwidth
        else:
            raise ValueError('wrong type for argument "differentialStyle"')

        binstr = bins.astype(int).astype(str)
        cols = []
        for e, i in enumerate(binstr[:-1]):
            cols.append(i + '-' + binstr[e + 1])
        dataFrame = pd.DataFrame(N, columns=cols, index=unique)
        # too_big = pd.DataFrame(too_big, columns=['# too big'])
        too_big = _timeseries.TimeSeries(
            pd.DataFrame(too_big, columns=['# too big'], index=unique))
        if distributionType == 'calibration':
            return sizedistribution.SizeDist_TS(dataFrame, bins, 'calibration')
        else:
            dist = sizedistribution.SizeDist_TS(dataFrame, bins, 'dNdDp')
            dist = dist.convert2dNdlogDp()
            dist.particle_number_concentration_outside_range = too_big
            return dist

コード例 #13

0

ファイルを表示

def read_csv(fname):
    las = _readFromFakeXLS(fname)
    sd, hk = _separate_sizedist_and_housekeep(las)
    bins = _get_bins(sd)
    dist = sizedistribution.SizeDist_TS(sd, bins, "numberConcentration")
    return dist

コード例 #14

0

ファイルを表示

def open_path(path,
              window=('2016-11-15', '2016-11-18'),
              average=None,
              verbose=True):
    """

    Parameters
    ----------
    path
    start_time
    end_time
    average: tuple [None]
        The purpose of this is to keep the memory usage low in case a lower reolution is required. E.g. (60, 's')

    Returns
    -------

    """
    def read_aosaps(file, verbose=False):
        ds = _xr.open_dataset(file, autoclose=True)
        data_dist = ds.N_TOF.to_pandas()
        data_dist = data_dist.iloc[:, :-1]
        # bincenters =
        bincenters = data_dist.columns.values * 1000
        #     dist = sd.SizeDist_TS(data_dist, bincenters, 'numberConcentration')
        binedges = _np.unique(
            ds.aerodynamic_diameter_bound.data.flatten())[1:] * 1000

        # normalize to sample flow rate
        sample_flow_rate_cc_s = (ds.total_flow_rate.to_pandas() -
                                 ds.sheath_flow_rate.to_pandas()) * 1000 / 60
        data_dist = data_dist.divide(sample_flow_rate_cc_s, axis='index')
        out = {}
        out['data_dist'] = data_dist
        out['bincenters'] = bincenters
        out['binedges'] = binedges
        if verbose:
            print(file)
            print('shapes: {}, {}'.format(data_dist.shape, bincenters.shape))
        return out

    # start_time, end_time  = window
    files = _tools.path2filelist(path=path, window=window, product='aosaps')
    if verbose:
        print('Opening {} files.'.format(len(files)))
        print(_tools.path2info(files[0]))
    data_dist = None
    binedges = None
    for file in files:
        out = read_aosaps(file)
        ddt = _ts.TimeSeries(out['data_dist'])
        if average:
            ddt = ddt.average_time(average)
        ddt = ddt.data
        if isinstance(data_dist, type(None)):
            data_dist = ddt
            binedges = out['binedges']
        else:
            data_dist = data_dist.append(ddt, sort=True)
            # make sure bincenters did not change
            assert (_np.all(_np.equal(binedges, out['binedges'])))

    dist = _sd.SizeDist_TS(data_dist,
                           binedges,
                           'numberConcentration',
                           ignore_data_gap_error=True)
    return dist

コード例 #15

0

ファイルを表示

def read_file(
    path,
    version='BBB_02',
    pattern='HK',
    skip_histogram=False,
    size_bins=None,
    # calibration_file = None,
    ignore_colums=[],  #['Flow_Rate_ccps', 'LED_P_MON', 'AI_4', 'AI_5', 'AI_7', 'AI_8', 'AI_9', 'AI_10', 'AI_11', 'LED_P_Mon_Therm', 'AO_Flow', 'AO_LaserPower', 'No_Pts', 'ValidParts', 'writeTime', 'currMax'],
    verbose=False,
):
    """
    Parameters
    ----------
    path: string or list of strings.
        This can either be a file name, a list of filenames or a folder.
    pattern: str
        if folder is given than this is the pattern housekeeping files will be identified by.
    version: string ['BBB_01']
        BBB_02: Hendix version, not sure since when. At least since 2022-06, but 
                most likely way earlier ...
        BBB_01: Beagle bone (original)
        sbRio: sbRio
    size_bins: int or pathlib.Path
        Path to a file containing the bin edges (EDGES not CENTERS!!). Structure: currently one value per line.
    verbose: bool
    Returns
    -------
    TimeSeries instance
    """

    # test_data_folder = os.listdir()
    # test_data_folder = '20150419_000_POPS_HK.csv'

    def read_sbRio(fname, skip_histogram=False, verbose=False):
        """Reads housekeeping file (test_data_folder; csv-format) returns a pandas data frame instance.
        """
        if verbose:
            print('reading %s' % fname)
        try:
            df = pd.read_csv(fname, error_bad_lines=False)
        except ValueError:
            return False
            #    data = df.values
            #    dateString = test_data_folder.split('_')[0]
        dt = datetime.datetime.strptime('19700101',
                                        "%Y%m%d") - datetime.datetime.strptime(
                                            '19040101', "%Y%m%d")
        dts = dt.total_seconds()
        # todo: (low) what is that delta t for, looks fishi (Hagen)
        dtsPlus = datetime.timedelta(hours=0).total_seconds()
        # Time_s = data[:,0]
        # data = data[:,1:]
        df.index = pd.Series(pd.to_datetime(df.Time_s - dts - dtsPlus,
                                            unit='s'),
                             name='Time_UTC')
        # if 'P_Baro' in df.keys():
        #     df['barometric_pressure'] = df.P_Baro
        #     df.drop('P_Baro', 1, inplace=True)
        #     df['altitude'] = ct.p2h(df.barometric_pressure)
        return POPSHouseKeeping(df)

    def read_BBB(fname, skip_histogram=False, verbose=False):
        if verbose:
            print(f'read pops house keeping bbb file: {fname}')
        col_names = pd.read_csv(
            fname,
            sep=',',
            nrows=1,
            header=None,
            #             index_col=1,
            #             usecols=np.arange()
        ).values[0][:-1].astype(str)
        col_names = _np.char.strip(col_names)

        if skip_histogram:
            usecols = list(range(27))
        else:
            usecols = None
        data = pd.read_csv(
            fname,
            sep=',',
            skiprows=1,
            header=None,
            usecols=usecols
            #             index_col=1,
            #             usecols=np.arange()
        )

        data_hk = data.iloc[:, :27]
        data_hk.columns = col_names
        data_hk.index = pd.to_datetime(data_hk['DateTime'], unit='s')
        data_hk.drop('DateTime', axis=1, inplace=True)
        #     hk = atmPy.general.timeseries.TimeSeries(data_hk, sampling_period = 1)

        hk = POPSHouseKeeping(data_hk, sampling_period=1)
        hk.data['Barometric_pressure'] = hk.data['P']
        return hk

    def read_BBB_02(fname, skip_histogram=False, verbose=False):
        if verbose:
            print(f'read pops house keeping file: {fname}')

        if skip_histogram:
            usecols = list(range(27))
        else:
            usecols = None
        data = pd.read_csv(fname, sep=',', usecols=usecols)

        data.columns = [col.strip() for col in data.columns]
        data.index = pd.to_datetime(data['DateTime'], unit='s')
        data.drop('DateTime', axis=1, inplace=True)

        hk = POPSHouseKeeping(data, sampling_period=1)
        hk.data['Barometric_pressure'] = hk.data['P']
        return hk

    dist = f'Extraction of the sizedistribution is currently not implemented for the file_version {version}'

    #### assign version
    if version == 'sbRio':
        read = read_sbRio
    elif version == 'BBB_01':
        read = read_BBB
    elif version == 'BBB_02':
        read = read_BBB_02
    else:
        raise ValueError('Housekeeping version {} is unknown!'.format(version))

    #### workplan
    path = pl.Path(path)
    if path.is_dir():
        file_paths = sorted(list(path.glob('*{}*'.format(pattern))))
    elif path.is_file():
        file_paths = [path]
    elif type(path) == list:
        file_paths = path
    else:
        raise TypeError('fname is of unknown type: {}'.format(
            type(path).__name__))

    file_paths.sort()

    #### read files
    hk_data = []
    for file in file_paths:

        hktmp = read(file, skip_histogram=skip_histogram, verbose=verbose)
        hk_data.append(hktmp.data)

    data = pd.concat(hk_data)

    #### generate POPSHouseKeeping instance and condition data
    hk = POPSHouseKeeping(data)
    hk.data = hk.data.dropna(
        how='all')  # this is necessary to avoid errors in further processing

    if ('P_Baro' in hk.data.keys()) or ('P_Ambient' in hk.data.keys()):
        if 'P_Baro' in hk.data.keys():
            hk.data['Barometric_pressure'] = hk.data.P_Baro
            hk.data.drop('P_Baro', 1, inplace=True)
        if 'P_Ambient' in hk.data.keys():
            hk.data['Barometric_pressure'] = hk.data.P_Ambient
            hk.data.drop('P_Ambient', 1, inplace=True)
            # try:
            # hk.data['Altitude'] = ct.p2h(hk.data.barometric_pressure)

    if ignore_colums:
        hk.data = hk.data.drop(ignore_colums, axis=1)

    #### separate housekeeping and sizedistribution
    if version == 'BBB_02':
        data = hk.data
        hist_cols = [
            col for col in data.columns
            if (col[0] == 'b' and col[1:].isnumeric())
        ]
        dist = data.loc[:, hist_cols]
        data.drop(hist_cols, axis=1, inplace=True)

        #### read size bin file
        fn = pl.Path(size_bins)
        with open(fn, 'r') as rein:
            lines = rein.readlines()
        bins = _np.array([float(l) for l in lines])

        #### generate size distribution timeseries instance
        dist = atmsd.SizeDist_TS(dist, bins, 'numberConcentration')

        dist.housekeeping = hk
    return {'housekeeping': hk, 'sizedistribution': dist}

コード例 #16

0

ファイルを表示

ファイル: current.py プロジェクト: hagne/ASR_NSA_science

def process(ipmatchrow,
            folders,
            test=False,
            raise_error=True,
            path2product_file=None,
            log={}):
    # imet
    ####
    imet = open_iMet(folders['path2imet_folder'].joinpath(ipmatchrow.fn_imet))

    didit = set_altitude_column(imet, ipmatchrow.which_alt, log=log)
    if not didit:
        return False

    ## fill missing timestamps with nans
    imet = imet.resample('1s').mean()

    start_time, end_time = imet.index.min(), imet.index.max()

    # POPS
    logentry = {'success': True}
    logentry['problems'] = []
    log['open_pops_file'] = logentry
    fn_pops = folders['path2pops_folder'].joinpath(ipmatchrow.fn_pops + '.nc')
    if 'sn00' in fn_pops.name:
        fn_pops_old = fn_pops
        fn_pops = fn_pops.parent.joinpath(
            fn_pops.name.replace('sn00', f'sn{ipmatchrow.popssn}'))
        warnings.warn(
            f'replaced sn00 with sn{ipmatchrow.popssn}\n{fn_pops_old} -> \n{fn_pops}'
        )
    if not fn_pops.is_file():
        logentry['success'] = False
        txt = f'POPS file not found ({fn_pops})'
        logentry['problems'].append(txt)
        warnings.warn(txt)
        return False
    ds = xr.open_dataset(fn_pops)

    ## size distribution
    dist = ds.size_distributions.to_pandas()
    dist = dist.resample('1s').mean()
    dist = dist.truncate(start_time, end_time)

    dist_ts = sd.SizeDist_TS(
        dist,
        size_distribution.diameter_binning.bincenters2binsANDnames(
            ds.bincenters.values)[0], 'dNdlogDp')
    dist_ts._data_period = 1

    particle_no_concentration = dist_ts.particle_number_concentration.data.copy(
    )

    particle_mean_d = dist_ts.particle_mean_diameter.data.copy()

    ## housekeeping
    df = ds.housekeeping.to_pandas()
    df = df.Altitude
    df = df.resample('1s').mean()
    df = df.truncate(start_time, end_time)

    # merge
    tbs = imet.copy()
    tbs['pops_particle_number_concentration'] = particle_no_concentration
    tbs['pops_particle_mean_diameter'] = particle_mean_d
    tbs['test_POPS_altitude'] = df.copy()

    # met
    met = load_met_files(start_time, end_time, folders)
    ## ground pressure
    tbs['ground_atm_pressure'] = met['press']
    tbs['ground_precip_rate'] = met['precip']

    # retrievals
    ## potential temperatur
    add_eqiv_potential_temp(tbs)

    # create xarray dataset
    dstbs = xr.Dataset(tbs)
    dstbs['pops_size_distribution'] = dist

    # additional retrievals that take the xarray dataset to work with
    # try:
    add_cloud_base_distance_and_transit(dstbs, log=log)
    # except:
    #     if raise_error:
    #         print('Error in adding adding cloud base stuff: ', sys.exc_info())
    #     else:
    #         return dstbs
    # add_sectioning(dstbs, folders=folders, path2product_file = path2product_file)
    add_uhsas_stuff(dstbs, dist_ts, uhsas_folder=folders['path2uhsas'])
    # try:
    add_mwr_products(dstbs, folders['path2mwr'], log=log)
    # except Exception as e:
    #     txt = e.__str__()
    # warnings.warn(txt)
    add_cloud_top(dstbs, folders['path2cloudtop'])
    ### rename a vew variables
    change_list = {
        'temp': 'temperature',
        'rh': 'relative_humidity',
        'potential_temperature': 'temperature_potential',
        'equiv_potential_temperature': 'temperature_equiv_potential'
    }

    dstbs = dstbs.rename_vars(change_list)

    ### sort variables in the dataset ... could not find a attribute that does that?!?

    varlist = list(dstbs.variables)
    varlist.sort()

    # regenerate the dataset sorted and cleaned
    dst = xr.Dataset()
    for var in varlist:
        dst[var] = dstbs[var]
    dstbs = dst

    ### standardize towards CF?

    # standardize time

    dstbs = dstbs.rename_dims({"datetime": 'time'})
    dstbs = dstbs.rename_vars({'datetime': 'time'})
    dstbs.time.attrs['long_name'] = 'Time offset from base_time'
    dstbs['time_offset'] = dstbs.time.copy()

    # adding base_time according to arm or CF standard

    td = pd.to_datetime(dstbs.time.values[0]) - pd.to_datetime('1970')
    dstbs['base_time'] = int(td.total_seconds())
    dstbs.base_time.attrs['string'] = pd.to_datetime(
        dstbs.time.values[0]).__str__() + ' 0:00'
    dstbs.base_time.attrs['long_name'] = 'Base time in Epoch'
    dstbs.base_time.attrs['units'] = 'seconds since 1970-1-1 0:00:00 0:00'
    # test
    if test:
        out = {}
        out['tbs'] = tbs
        out['start'] = start_time
        out['end'] = end_time
        return out
    else:
        return dstbs