Beispiel #1
0
def methane():

    # import original dataset and new datasets
    methanePrev = loadExcel(
        r"C:\Users\ARL\Desktop\Jashan\Summit\analyses\Data\Methane.xlsx")
    methane2018 = loadExcel(r'C:\Users\ARL\Desktop\SUM_CH4_insitu_2018.xlsx')
    methane2019 = loadExcel(
        r'C:\Users\ARL\Desktop\Summit_GC_2019\CH4_results\SUM_CH4_insitu_2019.xlsx'
    )

    # identify column names we want to keep
    goodcol = ['Decimal Year', 'Run median']  # good columns
    badcol = [x for x in methane2018.columns
              if x not in goodcol]  # bad columns
    newnames = ['DecYear', 'MR']
    for sheet in [methane2018, methane2019]:
        sheet.drop(badcol, axis=1, inplace=True)  # drop bad columns
        sheet.dropna(how='any', axis=0, inplace=True)  # drop NaN rows
        sheet.columns = newnames  # assign same col names

    methanePrev = methanePrev[methanePrev['DecYear'] <
                              2018]  # remove some pre 2018 vals

    comb = [methanePrev, methane2018, methane2019]  # create combination frame
    methaneFinal = pd.concat(comb)  # concat

    # trim extreme outliers
    values = methaneFinal['MR'].values
    z = np.abs(stats.zscore(values))
    thresh = 5
    methaneFinal = methaneFinal[~(z > thresh)]

    dates = decToDatetime(methaneFinal['DecYear'].values)  # conv to datetime
    methaneFinal['datetime'] = dates  # add to dataframe

    noaaMethane = pd.DataFrame(columns=['datetime', 'MR'])
    noaaMethane['datetime'], noaaMethane['MR'] = dates, methaneFinal[
        'MR'].values  # noaa version
    noaaMethane = noaaDateConv(noaaMethane)

    noaaMethane.to_csv('methane2019updated.txt',
                       header=None,
                       index=None,
                       sep=' ',
                       mode='w+')

    return methaneFinal
Beispiel #2
0
def ethaneAce():

    # Import Data Sets
    nmhcData = loadExcel(r"C:\Users\ARL\Desktop\Python Code\Data\NMHC.xlsx")

    # Cleaning Up Data
    nmhcData = nmhcData[nmhcData['DecYear'] > 2012]             # Only need years past 2012 in VOC Data
    reqRows = ['DecYear', 'ethane', 'acetylene']                # only need date, ethane, and acetylene
    nmhcData = nmhcData[reqRows]                                # just get required rows
    nmhcData = nmhcData.dropna(axis=0, how='any')

    with open('ethaneOriginal.txt', 'w+') as f:
        for index, value in nmhcData.iterrows():
            f.write('%f ' % value.DecYear)
            f.write('%f\n' % value.ethane)

    with open('aceOriginal.txt', 'w+') as f:
        for index, value in nmhcData.iterrows():
            f.write('%f ' % value.DecYear)
            f.write('%f\n' % value.acetylene)
def ethaneAce():

    # Import Data Sets
    homedir = r'C:\Users\ARL\Desktop\Jashan\SummitWildfireTracers'
    root = os.path.join(homedir, 'Data')
    nmhcData = loadExcel(os.path.join(root, 'NMHC.xlsx'))

    # Cleaning Up Data
    nmhcData = nmhcData[nmhcData['DecYear'] > 2012]             # Only need years past 2012 in VOC Data
    reqRows = ['DecYear', 'ethane', 'acetylene']                # only need date, ethane, and acetylene
    nmhcData = nmhcData[reqRows]                                # just get required rows
    nmhcData = nmhcData.dropna(axis=0, how='any')

    with open('ethaneOriginal.txt', 'w+') as f:
        for index, value in nmhcData.iterrows():
            f.write('%f ' % value.DecYear)
            f.write('%f\n' % value.ethane)

    with open('aceOriginal.txt', 'w+') as f:
        for index, value in nmhcData.iterrows():
            f.write('%f ' % value.DecYear)
            f.write('%f\n' % value.acetylene)
Beispiel #4
0
[note] This was one of the first real scripts I made for the project, it is a poor coding example, but it gives an
example of how much I've progressed since first learning Python
"""

# Import Libraries
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from fileLoading import loadExcel
import os

# import data
homedir = r'C:\Users\ARL\Desktop\Jashan\SummitWildfireTracers'
root = os.path.join(homedir, 'Data')

nmhcData = loadExcel(os.path.join(root, 'NMHC.XLSX'))
methaneData = loadExcel(os.path.join(root, 'Methane.XLSX'))

# Plotting NMHC
date = nmhcData.loc[:, 'DecYear']  # Variable describing the decimal Year
numCompounds = np.linspace(0, 11,
                           num=12)  # There are 12 compounds we want to plots
compounds = list(nmhcData.columns)[3:15]  # List of the compound names
numYears = np.linspace(2008, 2018,
                       num=((2018 - 2008) + 1))  # number of years total

for i in numCompounds:
    plt.figure(i)  # Open a new fig for each compounds
    figure(num=None, figsize=(8, 6), dpi=160, facecolor='w', edgecolor='k')
    plt.xlabel('Day of Year', fontdict=None, labelpad=None,
               fontsize=14)  # x labels all same
Beispiel #5
0
import pandas as pd
from fileLoading import loadExcel, readCsv
import os
from dateConv import visitToDatetime, createDatetime
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
from pandas.plotting import register_matplotlib_converters
import matplotlib.dates as mdates  # For formatting date

register_matplotlib_converters()

# import the data
root = r'C:\Users\ARL\Desktop\Jashan\Summit\analyses\Data'
datapath = os.path.join(root, 'TAWO_visit_log.xlsx')
visits = loadExcel(datapath)
concpath = os.path.join(root, 'ethane.txt')
ethane = readCsv(concpath)

# data cleaning
dates = visits['Date'].values
dates = dates[1:]
badcols = ['Initials', 'Unnamed: 5', 'Date']
visits.drop(badcols, axis=1, inplace=True)
visits.drop([0], axis=0, inplace=True)
visits.dropna(axis=0, how='all', inplace=True)
visits.reset_index(drop=True, inplace=True)

ethane.columns = ['yr', 'mo', 'dy', 'hr', 'na', 'val']

# create proper datetimes
Beispiel #6
0
"""
This script makes a few modifications to the Methane data from 2012-2018. Eventually it will also import new 2019
data from the spreadsheet. Created on May 29th, 2019
"""

# Import libraries
from fileLoading import loadExcel
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

root = r'C:\Users\ARL\Desktop\J_Summit\analyses\HarmonicFit\textFiles'
methaneData = loadExcel(r"C:\Users\ARL\Desktop\Python Code\Data\Methane.xlsx")
methaneData = methaneData.dropna(axis=0, how='any')                         # Remove NaN values, entire row is removed

# Remove extreme outliers
flag1 = methaneData[methaneData['MR'] > 2100].index
flag2 = methaneData[methaneData['MR'] < 1730].index

methaneData = methaneData.drop(flag1)
methaneData = methaneData.drop(flag2)

print(methaneData.max())
print('-'*10)
print(methaneData.min())

with open(root + r"\methaneARL_nofit.txt", 'w+') as f:
    for index, value in methaneData.iterrows():
        f.write("%f " % value.DecYear)
        f.write("%f\n" % value.MR)
def nmhc():

    start = time.time()
    # import original data set and new datasets
    homedir = r'C:\Users\ARL\Desktop\Jashan\SummitWildfireTracers'
    root = os.path.join(homedir, 'Data')
    nmhcPrev = loadExcel(os.path.join(root, 'NMHC.xlsx'))
    nmhc2018 = loadExcel(r'C:\Users\ARL\Desktop\Ambient_2018_V2.xlsx')
    nmhc2019 = loadExcel(
        r'C:\Users\ARL\Desktop\Summit_GC_2019\NMHC_results\Ambient_2019.xlsx')

    # identify the mixing ratio rows
    allrows = list(range(0, len(nmhc2018.index)))
    rowstokeep = list(range(70, 94))
    rowstodrop = [x for x in allrows if x not in rowstokeep]

    # drop rows from nmhc2018 and nmhc2019
    nmhc2018 = nmhc2018.drop(rowstodrop, axis=0)
    nmhc2019 = nmhc2019.drop(rowstodrop, axis=0)

    # drop unnecesary columns and rows with nan, then cols with nan
    dropcols = ['Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3']
    nmhc2018, nmhc2019 = nmhc2018.drop(dropcols,
                                       axis=1), nmhc2019.drop(dropcols, axis=1)
    nmhc2018 = nmhc2018.dropna(
        axis=0,
        how='all',
        subset=[x for x in nmhc2018.columns if x not in ['Unnamed: 0']])
    nmhc2019 = nmhc2019.dropna(
        axis=0,
        how='all',
        subset=[x for x in nmhc2019.columns if x not in ['Unnamed: 0']])

    # transpose, reset columns, drop first row and last row
    nmhc2018, nmhc2019 = nmhc2018.T.reset_index(), nmhc2019.T.reset_index()
    nmhc2018.columns, nmhc2019.columns = list(nmhc2018.loc[0]), list(
        nmhc2019.loc[0])
    nmhc2018 = nmhc2018.drop([0, len(nmhc2018) - 1], axis=0)
    nmhc2019 = nmhc2019.drop([0, len(nmhc2019) - 1], axis=0)

    end = time.time()
    print('transposed in ', end - start)

    # create datetime column for each dataframe
    for yr in [nmhc2018, nmhc2019]:
        datetime = []
        sampledate = yr['Unnamed: 0'][1]
        yearstr = str(sampledate)[:4]
        yearint = int(yearstr)  # gets the year

        for x in yr[f'Decimal Day of Year {str(yearstr)[:4]}']:
            datetime.append(decToDatetime(x))  # call decyear conv

        yr['datetime'] = datetime

    # create datetime column for past data
    datetime = []
    for x in nmhcPrev['DecYear']:
        datetime.append(decToDatetime(x))
    nmhcPrev['datetime'] = datetime

    # remove old unneeded date columns
    for yr in [nmhc2018, nmhc2019]:
        sampledate = yr['Unnamed: 0'][1]
        yearstr = str(sampledate)[:4]
        badcols = [
            'Day', 'Hour', 'Minute', 'Unnamed: 0',
            f'Decimal Day of Year {str(yearstr)[:4]}'
        ]
        yr.drop(badcols, axis=1, inplace=True)

    badcols = ['DecYear', 'DOY', 'Ignore']
    nmhcPrev.drop(badcols, axis=1, inplace=True)

    end = time.time()
    print('datetimes created in ', end - start)

    # combine all datasets into one dataframe
    nmhcPrev = nmhcPrev[nmhcPrev['datetime'] < dt.datetime(2018, 1,
                                                           1)]  # remove 2018
    nmhcPrev = nmhcPrev.append(nmhc2018)  # add all 2018
    nmhcPrev = nmhcPrev.append(nmhc2019)  # add all 2019

    end = time.time()
    print('datasets combined in ', end - start)

    # create textfiles for each NMHC
    compounds = [
        'ethane', 'ethene', 'propane', 'propene', 'i-butane', 'acetylene',
        'n-butane', 'i-pentane', 'n-pentane', 'hexane', 'Benzene', 'Toluene'
    ]

    for cpd in compounds:
        values = nmhcPrev[cpd]  # get the specfic cpd
        dates = nmhcPrev['datetime']  # get the specific datetimes
        final = pd.concat([dates, values], axis=1)
        final = final.dropna(axis=0, how='any')  # drop the NANs
        final = final[final['datetime'] > dt.datetime(
            2011, 1, 1)]  # remove pre2012 values because of gap

        final = noaaDateConv(final)  # conv date formats

        final.to_csv(f'{cpd}.txt', header=None, index=None, sep=' ', mode='w+')

        print(f'{cpd} file written')

    print('All Files Done')