Example #1
0
def picarroMetCombo(filename):

    met = metTrim()
    sheet = pd.read_csv(filename,
                        encoding='utf8',
                        header=None,
                        delim_whitespace=True)
    sheet.columns = ['date', 'value']
    sheet['datetime'] = decToDatetime(sheet['date'].values)
    sheet.drop('date', axis=1, inplace=True)
    earlyVals = ~(met['datetime'] <= sheet['datetime'][0])
    met.drop(earlyVals, axis=0, inplace=True)
    met.reset_index(drop=True, inplace=True)
    met.drop(['steady'], axis=1, inplace=True)

    # merge the met data onto the concentration data by finding the nearest datetime within an hour\
    sheet.dropna(axis=0, how='any', inplace=True)
    picarro = pd.merge_asof(sheet.sort_values('datetime'),
                            met,
                            on='datetime',
                            direction='nearest',
                            tolerance=pd.Timedelta('1 hour'))
    picarro.dropna(axis=0, how='any', inplace=True)

    return picarro
Example #2
0
def ratios():

    # import data sets
    root = r'C:\Users\ARL\Desktop\J_Summit\analyses\Data'
    ethane = pd.read_csv(root + r'\ethaneFIT.txt',
                         delim_whitespace=True,
                         error_bad_lines=False,
                         header=None)
    ace = pd.read_csv(root + r'\acetyleneFIT.txt',
                      delim_whitespace=True,
                      error_bad_lines=False,
                      header=None)
    methane = pd.read_csv(root + r'\methane.txt',
                          delim_whitespace=True,
                          error_bad_lines=False,
                          header=None)

    cols = ['DecYear', 'val', 'func', 'resid']  # column names

    # cleaning up data
    for sheet in [ethane, ace, methane]:
        # reassign col names
        if np.logical_or(sheet.iloc[0][0] == ethane.iloc[0][0],
                         sheet.iloc[0][0] == ace.iloc[0][0]):
            sheet.columns = cols
            sheet.drop(['func', 'resid'], axis=1,
                       inplace=True)  # drop misc cols
        else:
            sheet.columns = cols + ['smooth']
            sheet.drop(['func', 'resid', 'smooth'], axis=1, inplace=True)

        sheet = sheet[sheet['DecYear'] >= 2012]  # remove pre 2012 vals
        sheet.dropna(axis=0, how='any', inplace=True)  # remove NaN rows

    # create ratios
    tolerence = 3  # tolerence in hours
    ethane.name = 'ethane'
    ace.name = 'ace'
    for sheet in [ethane, ace]:
        ratiosheet, datesheet = ratioCreator(tolerence, sheet, methane)
        datesheet = decToDatetime(datesheet)
        df = pd.DataFrame(columns=['datetime', 'val'])
        df['datetime'], df['val'] = datesheet, ratiosheet
        df = noaaDateConv(df)
        df.to_csv(f'{sheet.name}Ratio.txt',
                  header=None,
                  index=None,
                  sep=' ',
                  mode='w+')
Example #3
0
def methane():

    # import original dataset and new datasets
    methanePrev = loadExcel(
        r"C:\Users\ARL\Desktop\Jashan\Summit\analyses\Data\Methane.xlsx")
    methane2018 = loadExcel(r'C:\Users\ARL\Desktop\SUM_CH4_insitu_2018.xlsx')
    methane2019 = loadExcel(
        r'C:\Users\ARL\Desktop\Summit_GC_2019\CH4_results\SUM_CH4_insitu_2019.xlsx'
    )

    # identify column names we want to keep
    goodcol = ['Decimal Year', 'Run median']  # good columns
    badcol = [x for x in methane2018.columns
              if x not in goodcol]  # bad columns
    newnames = ['DecYear', 'MR']
    for sheet in [methane2018, methane2019]:
        sheet.drop(badcol, axis=1, inplace=True)  # drop bad columns
        sheet.dropna(how='any', axis=0, inplace=True)  # drop NaN rows
        sheet.columns = newnames  # assign same col names

    methanePrev = methanePrev[methanePrev['DecYear'] <
                              2018]  # remove some pre 2018 vals

    comb = [methanePrev, methane2018, methane2019]  # create combination frame
    methaneFinal = pd.concat(comb)  # concat

    # trim extreme outliers
    values = methaneFinal['MR'].values
    z = np.abs(stats.zscore(values))
    thresh = 5
    methaneFinal = methaneFinal[~(z > thresh)]

    dates = decToDatetime(methaneFinal['DecYear'].values)  # conv to datetime
    methaneFinal['datetime'] = dates  # add to dataframe

    noaaMethane = pd.DataFrame(columns=['datetime', 'MR'])
    noaaMethane['datetime'], noaaMethane['MR'] = dates, methaneFinal[
        'MR'].values  # noaa version
    noaaMethane = noaaDateConv(noaaMethane)

    noaaMethane.to_csv('methane2019updated.txt',
                       header=None,
                       index=None,
                       sep=' ',
                       mode='w+')

    return methaneFinal
Example #4
0
def fireTrack():

    # import alternate data
    root = r'C:\Users\ARL\Desktop\Summit\analyses\Data'
    ace = readCsv(root + '\\' + r'aceRatioNoaa.txt')

    # import fire data
    virrs = True
    root = r'C:\Users\ARL\Desktop\FireData'
    if virrs:
        fire = pd.read_csv(root + r'\fire_archive_V1_60132.csv')
    else:
        fire = pd.read_csv(root + r'\fire_archive_M6_60131.csv')

    # data triming, reassign headers, add datetime column
    header = ['decyear', 'value', 'function', 'resid', 'residsmooth']
    ace.columns = header

    ace = ace[ace['value'] >= 0.00000001]

    ace['datetime'] = decToDatetime(ace['decyear'].values)
    ace['normResid'] = ace['resid'].values / ace['value'].values

    # combine fire and other dataset to produce master dataframe for analysis
    master = fireCombo(fire, ace, VIRRS=virrs)

    # identify average z score
    avg_vals = np.average(master['value_z'].values)
    avg_norms = np.average(master['normed_z'].values)

    print(f'The average z score in values is {avg_vals}')
    print(f'The average z score in normalized residuals is {avg_norms}')

    mybounds = {'x': (-73.2, -9.4), 'y': (57.8, 84.3)}

    # scatterplot mapping
    img = mpimg.imread(root + r'\greenland.PNG')

    if virrs:
        master.plot(kind='scatter',
                    x='longitude',
                    y='latitude',
                    c='bright_ti4',
                    cmap=plt.get_cmap('magma_r'),
                    colorbar=True,
                    figsize=(10, 7))
    else:
        master.plot(kind='scatter',
                    x='longitude',
                    y='latitude',
                    c='brightness',
                    cmap=plt.get_cmap('magma_r'),
                    colorbar=True,
                    figsize=(10, 7))

    plt.imshow(img,
               extent=[
                   mybounds['x'][0], mybounds['x'][1], mybounds['y'][0],
                   mybounds['y'][1]
               ],
               alpha=0.5)

    plt.xlabel('Longitude', fontsize=14)
    plt.ylabel('Latitude', fontsize=14)
    if virrs:
        plt.title('NASA VIIRS Fire Count Overlay on Greenland')
    else:
        plt.title('NASA MODIS Fire Count Overlay on Greenland')
    plt.legend()
    plt.show()
Example #5
0
def ch4plot():

    header = ['yr', 'value', 'function', 'resid',
              'residLine']  # dataframe headers
    root = r'C:\Users\ARL\Desktop\J_Summit\analyses\Data'
    filepath = root + '\\' + 'methane.txt'
    data = readCsv(filepath)
    data.columns = header

    register_matplotlib_converters()

    # convert the dec year col to datetime
    dates = decToDatetime(data['yr'])
    data['datetime'] = dates
    data.drop('yr', axis=1, inplace=True)

    # y bounds
    values = data['value']
    mean = np.mean(values)
    lowV = min(values) - (mean / 100)  # arbitrary vals look ok
    highV = max(values) + (mean / 100)

    mean = np.mean(data['resid'].values)
    lowR = min(data['resid']) - (mean / 3)
    highR = max(data['resid']) + (mean / 3)

    # x bounds
    low = min(data['datetime']) - dt.timedelta(days=30)
    high = max(data['datetime']) + dt.timedelta(days=30)

    # plotting
    sns.set()  # setup
    f, ax = plt.subplots(nrows=2, figsize=(12, 8))  # 2 column subplot
    sns.despine(f)
    plt.subplots_adjust(left=None,
                        bottom=None,
                        right=None,
                        top=None,
                        wspace=0.3,
                        hspace=0.5)

    # background data values with fitted harmonic functions
    ax1 = sns.scatterplot(x='datetime',
                          y='value',
                          data=data,
                          ax=ax[0],
                          alpha=0.7,
                          s=10,
                          legend='brief',
                          label='GC Data')
    ax2 = sns.lineplot(x='datetime',
                       y='function',
                       data=data,
                       ax=ax[0],
                       linewidth=2,
                       label='Fitted Curve')

    ax1.set_title('GC Methane Data with Fitted Function')
    ax1.set_xlabel('Date')
    ax1.set_ylabel('Mixing Ratio [ppb]')
    ax1.set(xlim=(low, high))
    ax1.set(ylim=(lowV, highV))
    ax1.get_lines()[0].set_color('#00b386')
    ax1.legend()

    # residual data
    ax3 = sns.scatterplot(x='datetime',
                          y='resid',
                          data=data,
                          ax=ax[1],
                          alpha=1,
                          s=10,
                          legend='brief',
                          label='Residuals from Fit')
    ax4 = sns.lineplot(x='datetime',
                       y='residLine',
                       data=data,
                       ax=ax[1],
                       linewidth=2,
                       label='Fitted Residual Curve')
    ax3.set_title('GC Residuals from Fitted Function')
    ax3.set_xlabel('Date')
    ax3.set_ylabel('Mixing Ratio [ppb]')
    ax4.get_lines()[0].set_color('#00b386')
    ax3.legend()
    ax3.set(xlim=(low, high))
    ax3.set(ylim=(lowR, highR))

    # save the plots
    direc = r'C:\Users\ARL\Desktop\J_Summit\analyses\Figures' + '\\' + 'methane.png'
    f.savefig(direc, format='png')
def plotratios(hours, ethane=True, all=True, summer=True, viirs=True):
    """
    plotratios is a function that imports either the acetylene/methane ratio or the ethane/methane ratio data and
    plots it, various conditions can be set.

    :param hours: Number of back trajectory hours ran with Hysplit, used for plot titles
    :param ethane: Default True. Set to false for acetylene data.
    :param all: Default True, uses all data. Set to false to cut z scores below 3.
    :param summer: Default True, cuts winter data. Set to false to use only winter data and cut summer data
    :param viirs: Default True, uses viirs fire data. Set to false to use MODIS C6 data

    :return: nothing, displays plot with plt.show()
    """

    # Create titles and set data path depending on options
    dataroot = r'C:\Users\ARL\Desktop\Jashan\Summit\analyses\Data'                     # data directory
    trajroot = r'C:\Users\ARL\Desktop\Jashan\Jashan PySplit\pysplitprocessor-master\pysplitprocessor\messeduptime_notUTC'
    if ethane:
        if all:
            root = os.path.join(trajroot, 'ethane_methane_all')
            title = f'{hours}h Back Trajectories of Ethane/Methane Ratio, 2012-2019'
            sheet = readCsv(dataroot + r'\ethaneRatioNoaa.txt')
        else:
            title = f'{hours}h Back Trajectories of Ethane/Methane Ratio Outliers, 2012-2019'
            sheet = readCsv(dataroot + r'\ethaneRatioNoaa.txt')
    else:
        if all:
            root = r'C:\Users\ARL\Desktop\Jashan\Jashan ' \
                   r'PySplit\pysplitprocessor-master\pysplitprocessor\aceTraj'
            title = f'{hours}h Back Trajectories of Acetylene/Methane Ratio, 2012-2018'
            sheet = readCsv(dataroot + r'\aceRatioNoaa.txt')
        else:
            title = f'{hours}h Back Trajectories of Acetylene/Methane Ratio Outliers, 2012-2019'
            root = r'C:\Users\ARL\Desktop\Jashan ' \
                   r'PySplit\pysplitprocessor-master\pysplitprocessor\ace_methane_traj_highz'
            sheet = readCsv(dataroot + r'\aceRatioNoaa.txt')

    header = ['decyear', 'value', 'function', 'resid', 'residsmooth']           # create header
    sheet.columns = header                                                      # assign column names
    sheet = sheet[sheet['value'] >= 0.000001]                                   # remove zero values

    sheet['datetime'] = decToDatetime(sheet['decyear'].values)                  # create datetimes from decyear
    sheet['datetime'] = sheet['datetime'] + pd.Timedelta('3 hours')             # convert tz to UTC

    dates = sheet['datetime'].tolist()                                          # put datetimes in list
    julian = []                                                                 # preallocate
    for d in dates:                                                             # loop over each date
        tt = d.timetuple()                                                      # create a timetuple from date
        jul = tt.tm_yday                                                        # get the julian year
        julian.append(jul)                                                      # append that to a list
    sheet['julian'] = julian                                                    # add to dataframe

    cutoffs = (120, 305)
    if summer:
        keep = np.logical_and(sheet['julian'] >= cutoffs[0],                    # find just summer values
                              sheet['julian'] <= cutoffs[1])
        print('-- Winter Data Removed')
    else:
        keep = ~(np.logical_and(sheet['julian'] >= cutoffs[0],                  # find just winter values
                                sheet['julian'] <= cutoffs[1]))
        print('-- Summer Data Removed')
    sheet = sheet[keep]

    dropcols = ['decyear', 'function', 'residsmooth']                           # columns to drop
    sheet.drop(dropcols, axis=1, inplace=True)                                  # drop unused columns

    # remove slow data or data above 342, below 72 degrees at Summit camp due to possible pollution
    sheetClean = metRemove(sheet, 1, dropMet=True)

    residuals = sheetClean['resid'].values                                      # numpy array of resid
    z = np.abs(stats.zscore(residuals))                                         # calculate z scores
    sheetClean['zscores'] = z                                                   # assign as column
    if all:
        thresh = 0                                                              # z score threshold
    else:
        thresh = 3
    sheetZ = sheetClean[z > thresh]                                             # remove non outliers
    sheetZ.reset_index(drop=True, inplace=True)

    trajPlot(root, title=title, zscores=sheetZ, viirs=viirs, summer=summer)
Example #7
0
from metRemove import metRemove
from scipy import stats

hours = 72
title = f'{hours}h Back Trajectories of Acetylene/Methane Ratio Outliers, 2012-2018'
root = r'C:\Users\ARL\Desktop\Jashan PySplit\pysplitprocessor-master\pysplitprocessor\ace_methane_traj'

dataroot = r'C:\Users\ARL\Desktop\Summit\analyses\Data'  # data directory
ace = readCsv(dataroot + r'\aceRatioNoaa.txt')  # data read in acetylene

header = ['decyear', 'value', 'function', 'resid',
          'residsmooth']  # assign column names
ace.columns = header
ace = ace[ace['value'] >= 0.00000001]

ace['datetime'] = decToDatetime(
    ace['decyear'].values)  # create datetimes from decyear

dates = ace['datetime'].tolist()  # put datetimes in a list
julian = []  # preallocate julian day list
for d in dates:  # loop over each date
    tt = d.timetuple()  # create a timetuple
    jul = tt.tm_yday  # identify julian day
    julian.append(jul)  # append to list
ace['julian'] = julian  # add to dataframe

cutoffs = (120, 305)  # identify julian cutoffs
keep = np.logical_and(
    ace['julian'] >= cutoffs[0],  # create boolean and array
    ace['julian'] <= cutoffs[1])
ace = ace[keep]  # boolean index to remove winter
def nmhc():

    start = time.time()
    # import original data set and new datasets
    homedir = r'C:\Users\ARL\Desktop\Jashan\SummitWildfireTracers'
    root = os.path.join(homedir, 'Data')
    nmhcPrev = loadExcel(os.path.join(root, 'NMHC.xlsx'))
    nmhc2018 = loadExcel(r'C:\Users\ARL\Desktop\Ambient_2018_V2.xlsx')
    nmhc2019 = loadExcel(
        r'C:\Users\ARL\Desktop\Summit_GC_2019\NMHC_results\Ambient_2019.xlsx')

    # identify the mixing ratio rows
    allrows = list(range(0, len(nmhc2018.index)))
    rowstokeep = list(range(70, 94))
    rowstodrop = [x for x in allrows if x not in rowstokeep]

    # drop rows from nmhc2018 and nmhc2019
    nmhc2018 = nmhc2018.drop(rowstodrop, axis=0)
    nmhc2019 = nmhc2019.drop(rowstodrop, axis=0)

    # drop unnecesary columns and rows with nan, then cols with nan
    dropcols = ['Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3']
    nmhc2018, nmhc2019 = nmhc2018.drop(dropcols,
                                       axis=1), nmhc2019.drop(dropcols, axis=1)
    nmhc2018 = nmhc2018.dropna(
        axis=0,
        how='all',
        subset=[x for x in nmhc2018.columns if x not in ['Unnamed: 0']])
    nmhc2019 = nmhc2019.dropna(
        axis=0,
        how='all',
        subset=[x for x in nmhc2019.columns if x not in ['Unnamed: 0']])

    # transpose, reset columns, drop first row and last row
    nmhc2018, nmhc2019 = nmhc2018.T.reset_index(), nmhc2019.T.reset_index()
    nmhc2018.columns, nmhc2019.columns = list(nmhc2018.loc[0]), list(
        nmhc2019.loc[0])
    nmhc2018 = nmhc2018.drop([0, len(nmhc2018) - 1], axis=0)
    nmhc2019 = nmhc2019.drop([0, len(nmhc2019) - 1], axis=0)

    end = time.time()
    print('transposed in ', end - start)

    # create datetime column for each dataframe
    for yr in [nmhc2018, nmhc2019]:
        datetime = []
        sampledate = yr['Unnamed: 0'][1]
        yearstr = str(sampledate)[:4]
        yearint = int(yearstr)  # gets the year

        for x in yr[f'Decimal Day of Year {str(yearstr)[:4]}']:
            datetime.append(decToDatetime(x))  # call decyear conv

        yr['datetime'] = datetime

    # create datetime column for past data
    datetime = []
    for x in nmhcPrev['DecYear']:
        datetime.append(decToDatetime(x))
    nmhcPrev['datetime'] = datetime

    # remove old unneeded date columns
    for yr in [nmhc2018, nmhc2019]:
        sampledate = yr['Unnamed: 0'][1]
        yearstr = str(sampledate)[:4]
        badcols = [
            'Day', 'Hour', 'Minute', 'Unnamed: 0',
            f'Decimal Day of Year {str(yearstr)[:4]}'
        ]
        yr.drop(badcols, axis=1, inplace=True)

    badcols = ['DecYear', 'DOY', 'Ignore']
    nmhcPrev.drop(badcols, axis=1, inplace=True)

    end = time.time()
    print('datetimes created in ', end - start)

    # combine all datasets into one dataframe
    nmhcPrev = nmhcPrev[nmhcPrev['datetime'] < dt.datetime(2018, 1,
                                                           1)]  # remove 2018
    nmhcPrev = nmhcPrev.append(nmhc2018)  # add all 2018
    nmhcPrev = nmhcPrev.append(nmhc2019)  # add all 2019

    end = time.time()
    print('datasets combined in ', end - start)

    # create textfiles for each NMHC
    compounds = [
        'ethane', 'ethene', 'propane', 'propene', 'i-butane', 'acetylene',
        'n-butane', 'i-pentane', 'n-pentane', 'hexane', 'Benzene', 'Toluene'
    ]

    for cpd in compounds:
        values = nmhcPrev[cpd]  # get the specfic cpd
        dates = nmhcPrev['datetime']  # get the specific datetimes
        final = pd.concat([dates, values], axis=1)
        final = final.dropna(axis=0, how='any')  # drop the NANs
        final = final[final['datetime'] > dt.datetime(
            2011, 1, 1)]  # remove pre2012 values because of gap

        final = noaaDateConv(final)  # conv date formats

        final.to_csv(f'{cpd}.txt', header=None, index=None, sep=' ', mode='w+')

        print(f'{cpd} file written')

    print('All Files Done')
def ratioPlot():
    register_matplotlib_converters()

    # import data
    homedir = r'C:\Users\ARL\Desktop\Jashan\SummitWildfireTracers'
    root = os.path.join(homedir, 'Data')
    ethane = readCsv(root + r'\ethaneRatioNoaa.txt')
    ace = readCsv(root + r'\aceRatioNoaa.txt')

    # data trimming, reassign headers, add datetime column
    header = ['decyear', 'value', 'function', 'resid', 'residsmooth']

    for sheet in [ethane, ace]:
        sheet.columns = header

    ethane = ethane[ethane['value'] >= 0.0000001]
    ace = ace[ace['value'] >= 0.00000001]
    ethane.name = 'Ethane'
    ace.name = 'Acetylene'

    for sheet in [ethane, ace]:
        sheet['datetime'] = decToDatetime(sheet['decyear'].values)

        if sheet.name == 'Ethane':
            ethane = sheet
        else:
            ace = sheet

        # plotting
        sns.set()
        f, ax = plt.subplots(nrows=3, figsize=(12, 8))
        sns.despine(f)
        plt.subplots_adjust(left=None,
                            bottom=None,
                            right=None,
                            top=None,
                            wspace=None,
                            hspace=0.8)
        ax1 = sns.scatterplot(x='datetime',
                              y='value',
                              data=sheet,
                              alpha=0.7,
                              label='Original Data',
                              ax=ax[0])
        ax2 = sns.lineplot(x='datetime',
                           y='function',
                           data=sheet,
                           linewidth=2,
                           label='Fitted Function',
                           ax=ax[0])
        ax1.set_title(sheet.name + ' / Methane Ratio', size=26)
        ax1.set_xlabel('Datetime', fontsize=22)
        ax1.set_ylabel('Ratio Value', fontsize=18)
        ax1.set(xlim=((min(sheet['datetime']) - dt.timedelta(days=10)),
                      (max(sheet['datetime']) + dt.timedelta(days=10))))
        ax1.set(ylim=(min(sheet['value']) - np.mean(sheet['value'] / 3),
                      max(sheet['value']) + np.mean(sheet['value'] / 3)))
        ax2.get_lines()[0].set_color('purple')
        ax1.legend(prop={'size': 14})

        ax3 = sns.scatterplot(x='datetime',
                              y='resid',
                              data=sheet,
                              alpha=0.7,
                              label='Residuals',
                              ax=ax[1])
        ax4 = sns.lineplot(x='datetime',
                           y='residsmooth',
                           data=sheet,
                           linewidth=2,
                           label='Smoothed Residual Fit',
                           ax=ax[1])
        ax4.get_lines()[0].set_color('purple')
        ax3.set_title('Residuals in ' + sheet.name, size=26)
        ax3.set_xlabel('Datetime', fontsize=22)
        ax3.set_ylabel('Residual / Value', fontsize=18)
        ax3.set(xlim=((min(sheet['datetime']) - dt.timedelta(days=10)),
                      (max(sheet['datetime']) + dt.timedelta(days=10))))
        ax3.set(ylim=(np.mean(sheet['resid']) - np.std(sheet['resid']) * 8,
                      np.mean(sheet['resid']) + np.std(sheet['resid']) * 8))
        ax3.legend(prop={'size': 14})

        # day of year plot residuals
        doy = []
        for x in sheet['datetime']:
            tt = x.timetuple()
            doy.append(tt.tm_yday)
        sheet['DOY'] = doy

        ax5 = sns.scatterplot(x='DOY',
                              y='resid',
                              data=sheet,
                              alpha=0.7,
                              label='Residuals',
                              ax=ax[2])
        ax5.set_title('Residuals by Julian Day', size=26)
        ax5.set_xlabel('Day of Year', fontsize=22)
        ax5.set_ylabel('Residual / Value', fontsize=18)
        ax5.set(xlim=((min(sheet['DOY'])), (max(sheet['DOY']))))
        ax5.set(ylim=(np.mean(sheet['resid']) - np.std(sheet['resid']) * 8,
                      np.mean(sheet['resid']) + np.std(sheet['resid']) * 8))
        ax5.legend(prop={'size': 14})

        direc = os.path.join(homedir,
                             'Figures') + '\\' + sheet.name + 'Ratio.png'
        f.savefig(direc, format='png')

        for ax in [ax1, ax2, ax3, ax4, ax5]:
            ax.tick_params(labelsize=18)

    matplotlib.rc("legend", fontsize=26)

    # plotting separate heatmap
    sns.set(style="white", font_scale=1.5)
    sns.despine()
    combo = pd.merge_asof(ethane, ace, on='datetime', direction='nearest')
    combo.dropna(axis=0, inplace=True, how='any')
    combo.drop(combo.index[5586:5776], axis=0, inplace=True)

    x = np.array(combo['resid_x']).reshape((-1, 1))
    y = np.array(combo['resid_y'])

    model = LinearRegression().fit(x, y)  # create liner regression fit
    rSquared = model.score(x, y)  # assign coeff of determination
    slope = model.coef_  # assign slope

    g = sns.jointplot(combo['resid_x'],
                      combo['resid_y'],
                      kind='reg',
                      color='#e65c00',
                      line_kws={
                          'label':
                          'rSquared: {:1.5f}\n Slope: {:1.5f}\n'.format(
                              rSquared, slope[0])
                      })
    g.set_axis_labels('Ethane/Methane Ratio',
                      'Acetylene/Methane Ratio',
                      fontsize=20)
    plt.tick_params(axis='both', labelsize=18)
    g.fig.suptitle('Correlation between Ethane and Acetylene Ratio Residuals',
                   fontsize=28)
    g.ax_joint.get_lines()[0].set_color('blue')
    plt.legend()
    plt.show()
Example #10
0
def ratioPlot():
    register_matplotlib_converters()

    # import data
    root = r'C:\Users\ARL\Desktop\J_Summit\analyses\Data'
    ethane = readCsv(root + r'\ethaneRatioNoaa.txt')
    ace = readCsv(root + r'\aceRatioNoaa.txt')

    # data triming, reassign headers, add datetime column
    header = ['decyear', 'value', 'function', 'resid', 'residsmooth']

    for sheet in [ethane, ace]:
        sheet.columns = header

    ethane = ethane[ethane['value'] >= 0.0000001]
    ace = ace[ace['value'] >= 0.00000001]
    ethane.name = 'Ethane'
    ace.name = 'Acetylene'

    for sheet in [ethane, ace]:

        sheet['datetime'] = decToDatetime(sheet['decyear'].values)

        normResid = sheet['resid'].values / sheet['value'].values
        normSmooth = sheet['residsmooth'].values / sheet['value'].values

        sheet.drop(['resid', 'residsmooth'], axis=1, inplace=True)
        sheet['resid'] = normResid
        sheet['residsmooth'] = normSmooth

        if sheet.name == 'Ethane':
            ethane = sheet
        else:
            ace = sheet

        # plotting
        sns.set()
        f, ax = plt.subplots(nrows=3, figsize=(12, 8))
        sns.despine(f)
        plt.subplots_adjust(left=None,
                            bottom=None,
                            right=None,
                            top=None,
                            wspace=None,
                            hspace=0.8)
        ax1 = sns.scatterplot(x='datetime',
                              y='value',
                              data=sheet,
                              alpha=0.7,
                              label='Original Data',
                              ax=ax[0])
        ax2 = sns.lineplot(x='datetime',
                           y='function',
                           data=sheet,
                           linewidth=2,
                           label='Fitted Function',
                           ax=ax[0])
        ax1.set_title(sheet.name + ' / Methane Ratio')
        ax1.set_xlabel('Datetime')
        ax1.set_ylabel('Mixing Ratio [ppb]')
        ax1.set(xlim=((min(sheet['datetime']) - dt.timedelta(days=10)),
                      (max(sheet['datetime']) + dt.timedelta(days=10))))
        ax1.set(ylim=(min(sheet['value']) - np.mean(sheet['value'] / 3),
                      max(sheet['value']) + np.mean(sheet['value'] / 3)))
        ax2.get_lines()[0].set_color('purple')
        ax1.legend()

        ax3 = sns.scatterplot(x='datetime',
                              y='resid',
                              data=sheet,
                              alpha=0.7,
                              label='Normalized Residuals',
                              ax=ax[1])
        ax4 = sns.lineplot(x='datetime',
                           y='residsmooth',
                           data=sheet,
                           linewidth=2,
                           label='Smoothed Residual Fit',
                           ax=ax[1])
        ax4.get_lines()[0].set_color('purple')
        ax3.set_title('Normalized Residuals in ' + sheet.name)
        ax3.set_xlabel('Datetime')
        ax3.set_ylabel('Mixing Ratio [ppb]')
        ax3.set(xlim=((min(sheet['datetime']) - dt.timedelta(days=10)),
                      (max(sheet['datetime']) + dt.timedelta(days=10))))
        ax3.set(ylim=(np.mean(sheet['resid']) - np.std(sheet['resid']) * 8,
                      np.mean(sheet['resid']) + np.std(sheet['resid']) * 8))
        ax3.legend()

        # day of year plot residuals
        doy = []
        for x in sheet['datetime']:
            tt = x.timetuple()
            doy.append(tt.tm_yday)
        sheet['DOY'] = doy

        ax5 = sns.scatterplot(x='DOY',
                              y='resid',
                              data=sheet,
                              alpha=0.7,
                              label='Normalized Residuals',
                              ax=ax[2])
        ax5.set_title('Normalized Residuals by Julian Day')
        ax5.set_xlabel('Day of Year')
        ax5.set_ylabel('Mixing Ratio [ppb]')
        ax5.set(xlim=((min(sheet['DOY'])), (max(sheet['DOY']))))
        ax5.set(ylim=(np.mean(sheet['resid']) - np.std(sheet['resid']) * 8,
                      np.mean(sheet['resid']) + np.std(sheet['resid']) * 8))
        ax5.legend()

        direc = r'C:\Users\ARL\Desktop\J_Summit\analyses\Figures' + '\\' + sheet.name + 'Ratio.png'
        f.savefig(direc, format='png')

    # plotting seperate heatmap
    sns.set(style="white")
    sns.despine()
    combo = pd.merge_asof(ethane, ace, on='datetime', direction='nearest')
    combo = combo[combo['resid_y'] > -5]

    x = np.array(combo['resid_x']).reshape((-1, 1))
    y = np.array(combo['resid_y'])

    model = LinearRegression().fit(x, y)  # create liner regression fit
    rSquared = model.score(x, y)  # assign coeff of determination
    slope = model.coef_  # assign slope

    g = sns.jointplot(combo['resid_x'],
                      combo['resid_y'],
                      kind='reg',
                      color='#e65c00',
                      line_kws={
                          'label':
                          'rSquared: {:1.5f}\n Slope: {:1.5f}\n'.format(
                              rSquared, slope[0])
                      })
    g.set_axis_labels('Ethane MR [ppb]', 'Acetylene MR [ppb]', fontsize=12)
    g.fig.suptitle(
        'Correlation between Ethane and Acetylene Normalized Residuals')
    g.ax_joint.get_lines()[0].set_color('blue')
    plt.legend()
    plt.show()
def ratios():
    """
    This function creates the ethane/methane and acetylene/methane ratios
    """

    # import data sets
    root = r'C:\Users\ARL\Desktop\Jashan\Summit\analyses\Data'
    ethane = pd.read_csv(root + r'\ethaneFIT.txt',
                         delim_whitespace=True,
                         error_bad_lines=False,
                         header=None)
    ace = pd.read_csv(root + r'\acetyleneFIT.txt',
                      delim_whitespace=True,
                      error_bad_lines=False,
                      header=None)
    methane = pd.read_csv(root + r'\methane.txt',
                          delim_whitespace=True,
                          error_bad_lines=False,
                          header=None)

    cols = ['DecYear', 'val', 'func', 'resid']  # column names

    # cleaning up data
    for sheet in [ethane, ace, methane]:
        # reassign col names
        if np.logical_or(sheet.iloc[0][0] == ethane.iloc[0][0],
                         sheet.iloc[0][0] == ace.iloc[0][0]):
            sheet.columns = cols
            sheet.drop(['func', 'resid'], axis=1,
                       inplace=True)  # drop misc cols
        else:
            sheet.columns = cols + ['smooth']
            sheet.drop(['func', 'resid', 'smooth'], axis=1, inplace=True)

        sheet = sheet[sheet['DecYear'] >= 2012]  # remove pre 2012 vals
        sheet.dropna(axis=0, how='any', inplace=True)  # remove NaN rows

    # create ratios
    tolerance = 3  # tolerance in hours
    # convert tolerance to decimal doy
    tolerance = ((tolerance / 24) / 365)

    ethane.name = 'ethane'
    ace.name = 'ace'
    for sheet in [ethane, ace]:
        combinedsheet = pd.merge_asof(sheet.sort_values('DecYear'),
                                      methane.sort_values('DecYear'),
                                      on='DecYear',
                                      tolerance=tolerance,
                                      direction='nearest')
        datesheet = decToDatetime(combinedsheet['DecYear'].tolist())
        combinedsheet['datetime'] = datesheet
        combinedsheet.columns = [
            'DecYear', f'{sheet.name}', 'methane', 'datetime'
        ]

        ratio = combinedsheet[f'{sheet.name}'] / combinedsheet['methane']
        combinedsheet.drop(['DecYear', f'{sheet.name}', 'methane'],
                           axis=1,
                           inplace=True)
        combinedsheet['ratio'] = ratio
        combinedsheet.dropna(axis=0, inplace=True, how='any')

        df = noaaDateConv(combinedsheet)
        df.to_csv(f'{sheet.name}Ratio_Aug.txt',
                  header=None,
                  index=None,
                  sep=' ',
                  mode='w+')
Example #12
0
header = ['yr', 'value', 'function', 'resid']                                               # dataframe headers

register_matplotlib_converters()

for cpd in compounds:
    filename = root + '\\' + cpd + 'FIT.txt'                                                # file ext
    data = readCsv(filename)
    data.columns = header                                                                   # reset column names

    data = data[data['value'] > 0.0]

    normResid = data['resid'].values / data['value'].values
    data.drop(['resid'], axis=1, inplace=True)
    data['resid'] = normResid

    dates = decToDatetime(data['yr'].values)                                                # call conv function
    data['datetime'] = dates                                                                # assign to DF
    data.drop('yr', axis=1, inplace=True)

    # trim a few extreme outliers
    values = data['value'].values                                                           # get the value col
    z = np.abs(stats.zscore(values))                                                        # get the z score
    thresh = 2                                                                              # > 3 std devs
    data = data[~(z > thresh)]                                                              # boolean index

    resids = data['resid'].values                                                           # same thing but trim resid
    z = np.abs(stats.zscore(resids))
    thresh = 5
    data = data[~(z > thresh)]

    # y bounds
Example #13
0
def windRoseMethane():

    # ---- import data
    root = r'C:\Users\ARL\Desktop\J_Summit\analyses\Data'
    met = metTrim()
    arl = metCombo(root + r'\methane2019updated.txt')
    arl.dropna(axis=0, how='any', inplace=True)
    pic = pd.read_csv(root + r'\picarro_ch4.txt',
                      delim_whitespace=True,
                      encoding='utf8',
                      error_bad_lines=False,
                      header=None)
    flask = pd.read_csv(root + r'\flask_ch4.txt',
                        delim_whitespace=True,
                        error_bad_lines=False,
                        header=None)

    # ---- combining datasets (met and picarro)
    pic.columns = ['date', 'value']
    pic['datetime'] = decToDatetime(pic['date'].values)
    pic.drop('date', axis=1, inplace=True)
    met.drop(['steady'], axis=1, inplace=True)

    # merge the met data onto the concentration data by finding the nearest datetime within an hour
    pic.dropna(axis=0, how='any', inplace=True)
    picarro = pd.merge_asof(pic.sort_values('datetime'),
                            met,
                            on='datetime',
                            direction='nearest',
                            tolerance=pd.Timedelta('1 hour'))
    picarro.dropna(axis=0, how='any', inplace=True)

    # ---- combining datasets (flask and met)
    met = metTrim()
    colnames = ['yr', 'mo', 'dy', 'hr', 'val']
    flask.columns = colnames
    flask['datetime'] = createDatetime(flask['yr'], flask['mo'], flask['dy'],
                                       flask['hr'])
    flask.drop(['yr', 'mo', 'dy', 'hr'], axis=1, inplace=True)
    earlyVals = (met['datetime'] <= flask['datetime'][0])
    met.drop(['steady'], axis=1, inplace=True)

    # merge the met data onto the concentration data by finding the nearest datetime within an hour
    flaskMet = pd.merge_asof(flask,
                             met,
                             on='datetime',
                             direction='nearest',
                             tolerance=pd.Timedelta('1 hour'))
    flaskMet.dropna(axis=0, how='any', inplace=True)

    # ---- plotting
    fig, (ax1, ax2, ax3) = plt.subplots(1,
                                        3,
                                        subplot_kw=dict(projection='windrose'))
    fig.suptitle('Methane Conc. at Summit by Wind Direction', fontsize=16)
    plt.subplots_adjust(left=None,
                        bottom=None,
                        right=None,
                        top=None,
                        wspace=0.2,
                        hspace=-0.3)

    # setup GC methane windrose
    ax1.bar(arl['dir'].values,
            arl['val'].values,
            normed=False,
            opening=0.9,
            edgecolor='black',
            nsector=24,
            bins=14,
            cmap=cm.viridis_r,
            blowto=False)
    ax1.set_title('GCFID Methane Conc. [ppb]\n')
    ax1.set_legend(loc=8,
                   fancybox=True,
                   shadow=True,
                   bbox_to_anchor=(0.5, -1.05))

    # setup picarro methane windrose
    ax2.bar(picarro['dir'].values,
            picarro['value'].values,
            normed=False,
            opening=0.9,
            edgecolor='black',
            nsector=24,
            bins=14,
            cmap=cm.viridis_r,
            blowto=False)
    ax2.set_title('Picarro Methane Conc. [ppb]\n', )
    ax2.set_legend(loc=8,
                   fancybox=True,
                   shadow=True,
                   bbox_to_anchor=(0.5, -1.05))

    # setup flask methane windrose
    ax3.bar(flaskMet['dir'].values,
            flaskMet['val'].values,
            normed=False,
            opening=0.9,
            edgecolor='black',
            nsector=24,
            bins=14,
            cmap=cm.viridis_r,
            blowto=False)
    ax3.set_title('Flask Methane Conc. [ppb]\n')
    ax3.set_legend(loc=8,
                   fancybox=True,
                   shadow=True,
                   bbox_to_anchor=(0.5, -1.05))

    plt.show()