Ejemplo n.º 1
0
def uhsas2sizedist(df):
    """
    Creates size distribution time series instance from uhsas data (as 
    returned by the read_file function)

    Parameters
    ----------
    df : pandas.DataFrame
        as put out by the read_file function.

    Returns
    -------
    dist : TYPE
        DESCRIPTION.

    """
    ## make bins (based on whats mentioned in the header)
    bins = _np.linspace(40, 1000, 99)

    ## the size distribution data
    data = df.iloc[:, :-1].copy()
    data.columns = bins

    ### to my knowledge the uhsas can not measure below ~70 nm
    data_trunc = data.loc[:, 69:]

    # make the size distribution
    bined, _ = _db.bincenters2binsANDnames(data_trunc.columns.values)
    dist = _sd.SizeDist_TS(data_trunc, bined, 'numberConcentration')
    return dist
Ejemplo n.º 2
0
Archivo: smps.py Proyecto: hagne/atm-py
def read_file(fn):
    out = {}
    df = pd.read_csv(fn)
    df.index = pd.to_datetime(df.DateTimeUTC)
    df.drop('DateTimeUTC', axis=1, inplace=True)
    # df.shape

    dist = df.loc[:, [i for i in df.columns
                      if i[:2] == 'Nn']].copy().astype(float)
    dist.columns = df.loc[:, [i for i in df.columns
                              if i[:2] == 'Ns']].iloc[0].astype(float) * 1000

    #     dist.index = pd.to_datetime(df.DateTimeUTC)

    dist = sd.SizeDist_TS(dist,
                          db.bincenters2binsANDnames(dist.columns.values)[0],
                          'dNdlogDp')
    dist = dist.convert2dNdlogDp()
    out['size_distribution'] = dist

    rest = df.drop([i for i in df.columns if i[:2] == 'Nn'], axis=1)
    rest = rest.drop([i for i in df.columns if i[:2] == 'Ns'], axis=1)
    #     rest = rest.rename({h['cpd3']: h['mylabel'] for h in header_dict}, axis = 1)
    out['rest'] = rest
    return out
Ejemplo n.º 3
0
    def _parse_netCDF(self):
        super(ArmDatasetSub,self)._parse_netCDF()

        df = pd.DataFrame(self._read_variable('number_concentration_DMA_APS'),
                          index = self.time_stamps)

        d = self._read_variable('diameter')
        bins, colnames = diameter_binning.bincenters2binsANDnames(d[:]*1000)

        self.size_distribution = sizedistribution.SizeDist_TS(df,bins,'dNdlogDp')
        self.size_distribution._data_period = self._data_period
Ejemplo n.º 4
0
    def _parse_netCDF(self):
        super(ArmDatasetSub, self)._parse_netCDF()

        df = pd.DataFrame(self._read_variable('number_concentration'),
                          index=self.time_stamps)

        d = self._read_variable('diameter')
        bins, colnames = diameter_binning.bincenters2binsANDnames(d[:] * 1000)

        self.size_distribution = sizedistribution.SizeDist_TS(
            df, bins, 'dNdlogDp')
        self.size_distribution._data_period = self._data_period
Ejemplo n.º 5
0
def load_PMEL_APS(fname):
    na_values = [u'StartDateTime', u'Dp_1', u'Dp_2', u'Dp_3', u'Dp_4', u'Dp_5', u'Dp_6', u'Dp_7', u'Dp_8', u'Dp_9', u'Dp_10', u'Dp_11', u'Dp_12', u'Dp_13', u'Dp_14', u'Dp_15', u'Dp_16', u'Dp_17', u'Dp_18', u'Dp_19', u'Dp_20', u'Dp_21', u'Dp_22', u'Dp_23', u'Dp_24', u'Dp_25', u'Dp_26', u'Dp_27', u'Dp_28', u'Dp_29', u'Dp_30', u'Dp_31', u'Dp_32', u'Dp_33', u'Dp_34', u'Dp_35', u'Dp_36', u'Dp_37', u'Dp_38', u'Dp_39', u'Dp_40', u'Dp_41', u'Dp_42', u'Dp_43', u'Dp_44', u'Dp_45', u'Dp_46', u'Dp_47', u'Dp_48', u'Dp_49', u'Dp_50', u'Dp_51', u'Dp_52',u'dNdlogDp_1', u'dNdlogDp_2', u'dNdlogDp_3', u'dNdlogDp_4', u'dNdlogDp_5', u'dNdlogDp_6', u'dNdlogDp_7', u'dNdlogDp_8', u'dNdlogDp_9', u'dNdlogDp_10', u'dNdlogDp_11', u'dNdlogDp_12', u'dNdlogDp_13', u'dNdlogDp_14', u'dNdlogDp_15', u'dNdlogDp_16', u'dNdlogDp_17', u'dNdlogDp_18', u'dNdlogDp_19', u'dNdlogDp_20', u'dNdlogDp_21', u'dNdlogDp_22', u'dNdlogDp_23', u'dNdlogDp_24', u'dNdlogDp_25', u'dNdlogDp_26', u'dNdlogDp_27', u'dNdlogDp_28', u'dNdlogDp_29', u'dNdlogDp_30', u'dNdlogDp_31', u'dNdlogDp_32', u'dNdlogDp_33', u'dNdlogDp_34', u'dNdlogDp_35', u'dNdlogDp_36', u'dNdlogDp_37', u'dNdlogDp_38', u'dNdlogDp_39', u'dNdlogDp_40', u'dNdlogDp_41', u'dNdlogDp_42', u'dNdlogDp_43', u'dNdlogDp_44', u'dNdlogDp_45', u'dNdlogDp_46', u'dNdlogDp_47', u'dNdlogDp_48', u'dNdlogDp_49', u'dNdlogDp_50', u'dNdlogDp_51', u'dNdlogDp_52']
    tab = pd.read_csv(fname, sep = '\t', na_values=na_values)
    tab = tab.dropna()
    newIndex = pd.to_datetime(tab.StartDateTime.values)
    tab.index = newIndex
    reducedTab = tab.iloc[:,53:]
    bincenters = tab.iloc[0,1:53].values*1000
    binedges,newColnames = diameter_binning.bincenters2binsANDnames(bincenters)
    reducedTab.columns = newColnames
    dist = sizedistribution.aerosolSizeDistribution(reducedTab, binedges, 'dNdlogDp')
    return dist
Ejemplo n.º 6
0
    def _parse_netCDF(self):
        super(ArmDatasetSub, self)._parse_netCDF()

        data = self._read_variable('number_concentration_DMA_APS')
        df = pd.DataFrame(data['data'], index=self.time_stamps)

        d = self._read_variable('diameter')['data']
        bins, colnames = diameter_binning.bincenters2binsANDnames(d[:] * 1000)

        self.size_distribution = sizedistribution.SizeDist_TS(
            df,
            bins,
            'dNdlogDp',
            ignore_data_gap_error=True,
            # fill_data_gaps_with = np.nan
        )
        self.size_distribution._data_period = self._data_period
        self.size_distribution.flag_info = self.flag_info
        availability = pd.DataFrame(data['availability'],
                                    index=self.time_stamps)
        self.size_distribution.availability = Data_Quality(
            self, availability, data['availability_type'], self.flag_info)
Ejemplo n.º 7
0
def extract_sizedistribution(df):
    #### get the size distribution data
    cols = df.columns
    cols = [i for i in cols if i.replace('.', '').isnumeric()]
    dist = df.loc[:, cols]
    if len(cols) == 0:
        return False

    # create bins for atmpy
    bins, _ = atmdb.bincenters2binsANDnames(np.array([float(i) for i in cols]))
    bins *= 2  #radius to diameter
    bins *= 1e3  # um to nm

    #### create sizedistribution instance
    #### todo: there is a scaling error since AERONET uses 'dVdlnDp' and I use 'dVdlogDp'
    dist_ts = atmsd.SizeDist_TS(
        dist,
        bins,
        'dVdlogDp',
        # fill_data_gaps_with=np.nan,
        ignore_data_gap_error=True,
    )

    return dist_ts
Ejemplo n.º 8
0
    def __init__(self, aod=0.1, diameter_range=[1e2, 2e4, 100]):
        """
        WARNING: 
            I am pretty sure there is a normalization problem, since I 
            substitude um with nm and r with d without doing proper normailzing.
            Also, the model is created for the natural logarithm while atmPy 
            assumes a log_10! This requires a further normalizaion (see 
            Seignfeld & Pandis). These things do not affect general shape but 
            will need to be addressed if absolute values are considered.
            
        Aerosol models used by the ABI aerosol optical depth retrieval. 
        From:
            GOES-R Advanced Baseline Imager (ABI) Algorithm Theoretical Basis
            Document For Suspended Matter/Aerosol Optical Depth and Aerosol 
            Size Parameter
            https://www.goes-r.gov/resources/docs.html
        The aerosol models of satellite retrievals do not strictly follow my
        model class so this is not inheriting Model at this point ... maybe
        later?
        
        Parameters
        ----------
        aod : float
            Aerosol optical depth. The exact aerosol model depends on the aerosol optical depth.
        diameter_range : array-like, optional
            Diameter range, in nanometer, and number of points the model is created for. The default is [1e2, 1e4, 100].

        Returns
        -------
        None.

        """
        models = pd.DataFrame([
            {
                'model': 'generic',
                'mode': 'fine',
                'rv': 0.145,
                'rv_scale': 0.0203,
                'sig': 0.3738,
                'sig_scale': 0.1365,
                'Cv': .1642,
                'Cv_scale': 0.7747,
                'n_r': 1.43,
                'n_i': 0.008,
                'n_scale': 0.002
            },
            {
                'model': 'generic',
                'mode': 'coarse',
                'rv': 3.1007,
                'rv_scale': 0.3364,
                'sig': 0.7292,
                'sig_scale': 0.098,
                'Cv': 0.1482,
                'Cv_scale': 0.6846,
                'n_r': 1.43,
                'n_i': 0.008,
                'n_scale': 0.002
            },
            {
                'model': 'urban',
                'mode': 'fine',
                'rv': 0.1604,
                'rv_scale': 0.434,
                'sig': 0.3642,
                'sig_scale': 0.1529,
                'Cv': 0.1718,
                'Cv_scale': 0.8213,
                'n_r': 1.42,
                'n_i': 0.007,
                'n_scale': 0.0015
            },
            {
                'model': 'urban',
                'mode': 'coarse',
                'rv': 3.3252,
                'rv_scale': 0.1411,
                'sig': 0.7595,
                'sig_scale': 0.1638,
                'Cv': 0.0934,
                'Cv_scale': 0.6394,
                'n_r': 1.42,
                'n_i': 0.007,
                'n_scale': 0.0015
            },
            {
                'model': 'smoke',
                'mode': 'fine',
                'rv': 0.1335,
                'rv_scale': 0.0096,
                'sig': 0.3834,
                'sig_scale': 0.0794,
                'Cv': 0.1748,
                'Cv_scale': 0.8914,
                'n_r': 1.51,
                'n_i': 0.02,
                'n_scale': 0
            },
            {
                'model': 'smoke',
                'mode': 'coarse',
                'rv': 3.4479,
                'rv_scale': 0.9489,
                'sig': 0.7433,
                'sig_scale': 0.0409,
                'Cv': 0.1043,
                'Cv_scale': 0.6824,
                'n_r': 1.51,
                'n_i': 0.02,
                'n_scale': 0
            },
            {
                'model': 'dust',
                'mode': 'fine',
                'rv': 0.1416,
                'rv_scale': -0.0519,
                'sig': 0.7561,
                'sig_scale': 0.148,
                'Cv': 0.087,
                'Cv_scale': 1.026,
                'n_r': 1.48,
                'n_i': 0.0025,
                'n_scale': (-0.021, 0.132)
            },
            {
                'model': 'dust',
                'mode': 'coarse',
                'rv': 2.2,
                'rv_scale': 0,
                'sig': 0.554,
                'sig_scale': -0.0519,
                'Cv': 0.6786,
                'Cv_scale': 1.0569,
                'n_r': 1.48,
                'n_i': 0.0025,
                'n_scale': (-0.021, 0.132)
            },
        ])
        self.model_parameters = models
        self.aod = aod

        r_range = np.array(diameter_range[:2]) / 2 / 1e3
        r = np.logspace(np.log10(r_range[0]), np.log10(r_range[1]),
                        diameter_range[2])

        #####
        bins, names = db.bincenters2binsANDnames(r * 2 * 1e3)
        dists = {}
        for mo in models.model.unique():
            mos = models[models.model == mo]

            dist = np.zeros(r.shape)
            for idx, row in mos.iterrows():
                if row.model == 'dust':
                    rv = row.rv * aod**row.rv_scale
                    sig = row.sig * aod**row.sig_scale
                else:
                    rv = row.rv + (row.rv_scale * aod)
                    sig = row.sig + (row.sig_scale * aod)

                Cv = row.Cv * aod**row.Cv_scale

                dist += Cv / (np.sqrt(2 * np.pi) * sig) * np.exp(
                    -(np.log(r) - np.log(rv))**2 / (2 * sig**2))
            dist = sd.SizeDist(pd.DataFrame([dist], columns=names), bins,
                               'dVdlogDp')
            dists[mo] = dist

        self.models = dists
        for mo in dists:
            dist = dists[mo]
            setattr(self, mo, dists[mo])