예제 #1
0
def ratios():

    # import data sets
    root = r'C:\Users\ARL\Desktop\J_Summit\analyses\Data'
    ethane = pd.read_csv(root + r'\ethaneFIT.txt',
                         delim_whitespace=True,
                         error_bad_lines=False,
                         header=None)
    ace = pd.read_csv(root + r'\acetyleneFIT.txt',
                      delim_whitespace=True,
                      error_bad_lines=False,
                      header=None)
    methane = pd.read_csv(root + r'\methane.txt',
                          delim_whitespace=True,
                          error_bad_lines=False,
                          header=None)

    cols = ['DecYear', 'val', 'func', 'resid']  # column names

    # cleaning up data
    for sheet in [ethane, ace, methane]:
        # reassign col names
        if np.logical_or(sheet.iloc[0][0] == ethane.iloc[0][0],
                         sheet.iloc[0][0] == ace.iloc[0][0]):
            sheet.columns = cols
            sheet.drop(['func', 'resid'], axis=1,
                       inplace=True)  # drop misc cols
        else:
            sheet.columns = cols + ['smooth']
            sheet.drop(['func', 'resid', 'smooth'], axis=1, inplace=True)

        sheet = sheet[sheet['DecYear'] >= 2012]  # remove pre 2012 vals
        sheet.dropna(axis=0, how='any', inplace=True)  # remove NaN rows

    # create ratios
    tolerence = 3  # tolerence in hours
    ethane.name = 'ethane'
    ace.name = 'ace'
    for sheet in [ethane, ace]:
        ratiosheet, datesheet = ratioCreator(tolerence, sheet, methane)
        datesheet = decToDatetime(datesheet)
        df = pd.DataFrame(columns=['datetime', 'val'])
        df['datetime'], df['val'] = datesheet, ratiosheet
        df = noaaDateConv(df)
        df.to_csv(f'{sheet.name}Ratio.txt',
                  header=None,
                  index=None,
                  sep=' ',
                  mode='w+')
예제 #2
0
def methane():

    # import original dataset and new datasets
    methanePrev = loadExcel(
        r"C:\Users\ARL\Desktop\Jashan\Summit\analyses\Data\Methane.xlsx")
    methane2018 = loadExcel(r'C:\Users\ARL\Desktop\SUM_CH4_insitu_2018.xlsx')
    methane2019 = loadExcel(
        r'C:\Users\ARL\Desktop\Summit_GC_2019\CH4_results\SUM_CH4_insitu_2019.xlsx'
    )

    # identify column names we want to keep
    goodcol = ['Decimal Year', 'Run median']  # good columns
    badcol = [x for x in methane2018.columns
              if x not in goodcol]  # bad columns
    newnames = ['DecYear', 'MR']
    for sheet in [methane2018, methane2019]:
        sheet.drop(badcol, axis=1, inplace=True)  # drop bad columns
        sheet.dropna(how='any', axis=0, inplace=True)  # drop NaN rows
        sheet.columns = newnames  # assign same col names

    methanePrev = methanePrev[methanePrev['DecYear'] <
                              2018]  # remove some pre 2018 vals

    comb = [methanePrev, methane2018, methane2019]  # create combination frame
    methaneFinal = pd.concat(comb)  # concat

    # trim extreme outliers
    values = methaneFinal['MR'].values
    z = np.abs(stats.zscore(values))
    thresh = 5
    methaneFinal = methaneFinal[~(z > thresh)]

    dates = decToDatetime(methaneFinal['DecYear'].values)  # conv to datetime
    methaneFinal['datetime'] = dates  # add to dataframe

    noaaMethane = pd.DataFrame(columns=['datetime', 'MR'])
    noaaMethane['datetime'], noaaMethane['MR'] = dates, methaneFinal[
        'MR'].values  # noaa version
    noaaMethane = noaaDateConv(noaaMethane)

    noaaMethane.to_csv('methane2019updated.txt',
                       header=None,
                       index=None,
                       sep=' ',
                       mode='w+')

    return methaneFinal
def nmhc():

    start = time.time()
    # import original data set and new datasets
    homedir = r'C:\Users\ARL\Desktop\Jashan\SummitWildfireTracers'
    root = os.path.join(homedir, 'Data')
    nmhcPrev = loadExcel(os.path.join(root, 'NMHC.xlsx'))
    nmhc2018 = loadExcel(r'C:\Users\ARL\Desktop\Ambient_2018_V2.xlsx')
    nmhc2019 = loadExcel(
        r'C:\Users\ARL\Desktop\Summit_GC_2019\NMHC_results\Ambient_2019.xlsx')

    # identify the mixing ratio rows
    allrows = list(range(0, len(nmhc2018.index)))
    rowstokeep = list(range(70, 94))
    rowstodrop = [x for x in allrows if x not in rowstokeep]

    # drop rows from nmhc2018 and nmhc2019
    nmhc2018 = nmhc2018.drop(rowstodrop, axis=0)
    nmhc2019 = nmhc2019.drop(rowstodrop, axis=0)

    # drop unnecesary columns and rows with nan, then cols with nan
    dropcols = ['Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3']
    nmhc2018, nmhc2019 = nmhc2018.drop(dropcols,
                                       axis=1), nmhc2019.drop(dropcols, axis=1)
    nmhc2018 = nmhc2018.dropna(
        axis=0,
        how='all',
        subset=[x for x in nmhc2018.columns if x not in ['Unnamed: 0']])
    nmhc2019 = nmhc2019.dropna(
        axis=0,
        how='all',
        subset=[x for x in nmhc2019.columns if x not in ['Unnamed: 0']])

    # transpose, reset columns, drop first row and last row
    nmhc2018, nmhc2019 = nmhc2018.T.reset_index(), nmhc2019.T.reset_index()
    nmhc2018.columns, nmhc2019.columns = list(nmhc2018.loc[0]), list(
        nmhc2019.loc[0])
    nmhc2018 = nmhc2018.drop([0, len(nmhc2018) - 1], axis=0)
    nmhc2019 = nmhc2019.drop([0, len(nmhc2019) - 1], axis=0)

    end = time.time()
    print('transposed in ', end - start)

    # create datetime column for each dataframe
    for yr in [nmhc2018, nmhc2019]:
        datetime = []
        sampledate = yr['Unnamed: 0'][1]
        yearstr = str(sampledate)[:4]
        yearint = int(yearstr)  # gets the year

        for x in yr[f'Decimal Day of Year {str(yearstr)[:4]}']:
            datetime.append(decToDatetime(x))  # call decyear conv

        yr['datetime'] = datetime

    # create datetime column for past data
    datetime = []
    for x in nmhcPrev['DecYear']:
        datetime.append(decToDatetime(x))
    nmhcPrev['datetime'] = datetime

    # remove old unneeded date columns
    for yr in [nmhc2018, nmhc2019]:
        sampledate = yr['Unnamed: 0'][1]
        yearstr = str(sampledate)[:4]
        badcols = [
            'Day', 'Hour', 'Minute', 'Unnamed: 0',
            f'Decimal Day of Year {str(yearstr)[:4]}'
        ]
        yr.drop(badcols, axis=1, inplace=True)

    badcols = ['DecYear', 'DOY', 'Ignore']
    nmhcPrev.drop(badcols, axis=1, inplace=True)

    end = time.time()
    print('datetimes created in ', end - start)

    # combine all datasets into one dataframe
    nmhcPrev = nmhcPrev[nmhcPrev['datetime'] < dt.datetime(2018, 1,
                                                           1)]  # remove 2018
    nmhcPrev = nmhcPrev.append(nmhc2018)  # add all 2018
    nmhcPrev = nmhcPrev.append(nmhc2019)  # add all 2019

    end = time.time()
    print('datasets combined in ', end - start)

    # create textfiles for each NMHC
    compounds = [
        'ethane', 'ethene', 'propane', 'propene', 'i-butane', 'acetylene',
        'n-butane', 'i-pentane', 'n-pentane', 'hexane', 'Benzene', 'Toluene'
    ]

    for cpd in compounds:
        values = nmhcPrev[cpd]  # get the specfic cpd
        dates = nmhcPrev['datetime']  # get the specific datetimes
        final = pd.concat([dates, values], axis=1)
        final = final.dropna(axis=0, how='any')  # drop the NANs
        final = final[final['datetime'] > dt.datetime(
            2011, 1, 1)]  # remove pre2012 values because of gap

        final = noaaDateConv(final)  # conv date formats

        final.to_csv(f'{cpd}.txt', header=None, index=None, sep=' ', mode='w+')

        print(f'{cpd} file written')

    print('All Files Done')
def ratios():
    """
    This function creates the ethane/methane and acetylene/methane ratios
    """

    # import data sets
    root = r'C:\Users\ARL\Desktop\Jashan\Summit\analyses\Data'
    ethane = pd.read_csv(root + r'\ethaneFIT.txt',
                         delim_whitespace=True,
                         error_bad_lines=False,
                         header=None)
    ace = pd.read_csv(root + r'\acetyleneFIT.txt',
                      delim_whitespace=True,
                      error_bad_lines=False,
                      header=None)
    methane = pd.read_csv(root + r'\methane.txt',
                          delim_whitespace=True,
                          error_bad_lines=False,
                          header=None)

    cols = ['DecYear', 'val', 'func', 'resid']  # column names

    # cleaning up data
    for sheet in [ethane, ace, methane]:
        # reassign col names
        if np.logical_or(sheet.iloc[0][0] == ethane.iloc[0][0],
                         sheet.iloc[0][0] == ace.iloc[0][0]):
            sheet.columns = cols
            sheet.drop(['func', 'resid'], axis=1,
                       inplace=True)  # drop misc cols
        else:
            sheet.columns = cols + ['smooth']
            sheet.drop(['func', 'resid', 'smooth'], axis=1, inplace=True)

        sheet = sheet[sheet['DecYear'] >= 2012]  # remove pre 2012 vals
        sheet.dropna(axis=0, how='any', inplace=True)  # remove NaN rows

    # create ratios
    tolerance = 3  # tolerance in hours
    # convert tolerance to decimal doy
    tolerance = ((tolerance / 24) / 365)

    ethane.name = 'ethane'
    ace.name = 'ace'
    for sheet in [ethane, ace]:
        combinedsheet = pd.merge_asof(sheet.sort_values('DecYear'),
                                      methane.sort_values('DecYear'),
                                      on='DecYear',
                                      tolerance=tolerance,
                                      direction='nearest')
        datesheet = decToDatetime(combinedsheet['DecYear'].tolist())
        combinedsheet['datetime'] = datesheet
        combinedsheet.columns = [
            'DecYear', f'{sheet.name}', 'methane', 'datetime'
        ]

        ratio = combinedsheet[f'{sheet.name}'] / combinedsheet['methane']
        combinedsheet.drop(['DecYear', f'{sheet.name}', 'methane'],
                           axis=1,
                           inplace=True)
        combinedsheet['ratio'] = ratio
        combinedsheet.dropna(axis=0, inplace=True, how='any')

        df = noaaDateConv(combinedsheet)
        df.to_csv(f'{sheet.name}Ratio_Aug.txt',
                  header=None,
                  index=None,
                  sep=' ',
                  mode='w+')