def ratios(): # import data sets root = r'C:\Users\ARL\Desktop\J_Summit\analyses\Data' ethane = pd.read_csv(root + r'\ethaneFIT.txt', delim_whitespace=True, error_bad_lines=False, header=None) ace = pd.read_csv(root + r'\acetyleneFIT.txt', delim_whitespace=True, error_bad_lines=False, header=None) methane = pd.read_csv(root + r'\methane.txt', delim_whitespace=True, error_bad_lines=False, header=None) cols = ['DecYear', 'val', 'func', 'resid'] # column names # cleaning up data for sheet in [ethane, ace, methane]: # reassign col names if np.logical_or(sheet.iloc[0][0] == ethane.iloc[0][0], sheet.iloc[0][0] == ace.iloc[0][0]): sheet.columns = cols sheet.drop(['func', 'resid'], axis=1, inplace=True) # drop misc cols else: sheet.columns = cols + ['smooth'] sheet.drop(['func', 'resid', 'smooth'], axis=1, inplace=True) sheet = sheet[sheet['DecYear'] >= 2012] # remove pre 2012 vals sheet.dropna(axis=0, how='any', inplace=True) # remove NaN rows # create ratios tolerence = 3 # tolerence in hours ethane.name = 'ethane' ace.name = 'ace' for sheet in [ethane, ace]: ratiosheet, datesheet = ratioCreator(tolerence, sheet, methane) datesheet = decToDatetime(datesheet) df = pd.DataFrame(columns=['datetime', 'val']) df['datetime'], df['val'] = datesheet, ratiosheet df = noaaDateConv(df) df.to_csv(f'{sheet.name}Ratio.txt', header=None, index=None, sep=' ', mode='w+')
def methane(): # import original dataset and new datasets methanePrev = loadExcel( r"C:\Users\ARL\Desktop\Jashan\Summit\analyses\Data\Methane.xlsx") methane2018 = loadExcel(r'C:\Users\ARL\Desktop\SUM_CH4_insitu_2018.xlsx') methane2019 = loadExcel( r'C:\Users\ARL\Desktop\Summit_GC_2019\CH4_results\SUM_CH4_insitu_2019.xlsx' ) # identify column names we want to keep goodcol = ['Decimal Year', 'Run median'] # good columns badcol = [x for x in methane2018.columns if x not in goodcol] # bad columns newnames = ['DecYear', 'MR'] for sheet in [methane2018, methane2019]: sheet.drop(badcol, axis=1, inplace=True) # drop bad columns sheet.dropna(how='any', axis=0, inplace=True) # drop NaN rows sheet.columns = newnames # assign same col names methanePrev = methanePrev[methanePrev['DecYear'] < 2018] # remove some pre 2018 vals comb = [methanePrev, methane2018, methane2019] # create combination frame methaneFinal = pd.concat(comb) # concat # trim extreme outliers values = methaneFinal['MR'].values z = np.abs(stats.zscore(values)) thresh = 5 methaneFinal = methaneFinal[~(z > thresh)] dates = decToDatetime(methaneFinal['DecYear'].values) # conv to datetime methaneFinal['datetime'] = dates # add to dataframe noaaMethane = pd.DataFrame(columns=['datetime', 'MR']) noaaMethane['datetime'], noaaMethane['MR'] = dates, methaneFinal[ 'MR'].values # noaa version noaaMethane = noaaDateConv(noaaMethane) noaaMethane.to_csv('methane2019updated.txt', header=None, index=None, sep=' ', mode='w+') return methaneFinal
def nmhc(): start = time.time() # import original data set and new datasets homedir = r'C:\Users\ARL\Desktop\Jashan\SummitWildfireTracers' root = os.path.join(homedir, 'Data') nmhcPrev = loadExcel(os.path.join(root, 'NMHC.xlsx')) nmhc2018 = loadExcel(r'C:\Users\ARL\Desktop\Ambient_2018_V2.xlsx') nmhc2019 = loadExcel( r'C:\Users\ARL\Desktop\Summit_GC_2019\NMHC_results\Ambient_2019.xlsx') # identify the mixing ratio rows allrows = list(range(0, len(nmhc2018.index))) rowstokeep = list(range(70, 94)) rowstodrop = [x for x in allrows if x not in rowstokeep] # drop rows from nmhc2018 and nmhc2019 nmhc2018 = nmhc2018.drop(rowstodrop, axis=0) nmhc2019 = nmhc2019.drop(rowstodrop, axis=0) # drop unnecesary columns and rows with nan, then cols with nan dropcols = ['Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3'] nmhc2018, nmhc2019 = nmhc2018.drop(dropcols, axis=1), nmhc2019.drop(dropcols, axis=1) nmhc2018 = nmhc2018.dropna( axis=0, how='all', subset=[x for x in nmhc2018.columns if x not in ['Unnamed: 0']]) nmhc2019 = nmhc2019.dropna( axis=0, how='all', subset=[x for x in nmhc2019.columns if x not in ['Unnamed: 0']]) # transpose, reset columns, drop first row and last row nmhc2018, nmhc2019 = nmhc2018.T.reset_index(), nmhc2019.T.reset_index() nmhc2018.columns, nmhc2019.columns = list(nmhc2018.loc[0]), list( nmhc2019.loc[0]) nmhc2018 = nmhc2018.drop([0, len(nmhc2018) - 1], axis=0) nmhc2019 = nmhc2019.drop([0, len(nmhc2019) - 1], axis=0) end = time.time() print('transposed in ', end - start) # create datetime column for each dataframe for yr in [nmhc2018, nmhc2019]: datetime = [] sampledate = yr['Unnamed: 0'][1] yearstr = str(sampledate)[:4] yearint = int(yearstr) # gets the year for x in yr[f'Decimal Day of Year {str(yearstr)[:4]}']: datetime.append(decToDatetime(x)) # call decyear conv yr['datetime'] = datetime # create datetime column for past data datetime = [] for x in nmhcPrev['DecYear']: datetime.append(decToDatetime(x)) nmhcPrev['datetime'] = datetime # remove old unneeded date columns for yr in [nmhc2018, nmhc2019]: sampledate = yr['Unnamed: 0'][1] yearstr = str(sampledate)[:4] badcols = [ 'Day', 'Hour', 'Minute', 'Unnamed: 0', f'Decimal Day of Year {str(yearstr)[:4]}' ] yr.drop(badcols, axis=1, inplace=True) badcols = ['DecYear', 'DOY', 'Ignore'] nmhcPrev.drop(badcols, axis=1, inplace=True) end = time.time() print('datetimes created in ', end - start) # combine all datasets into one dataframe nmhcPrev = nmhcPrev[nmhcPrev['datetime'] < dt.datetime(2018, 1, 1)] # remove 2018 nmhcPrev = nmhcPrev.append(nmhc2018) # add all 2018 nmhcPrev = nmhcPrev.append(nmhc2019) # add all 2019 end = time.time() print('datasets combined in ', end - start) # create textfiles for each NMHC compounds = [ 'ethane', 'ethene', 'propane', 'propene', 'i-butane', 'acetylene', 'n-butane', 'i-pentane', 'n-pentane', 'hexane', 'Benzene', 'Toluene' ] for cpd in compounds: values = nmhcPrev[cpd] # get the specfic cpd dates = nmhcPrev['datetime'] # get the specific datetimes final = pd.concat([dates, values], axis=1) final = final.dropna(axis=0, how='any') # drop the NANs final = final[final['datetime'] > dt.datetime( 2011, 1, 1)] # remove pre2012 values because of gap final = noaaDateConv(final) # conv date formats final.to_csv(f'{cpd}.txt', header=None, index=None, sep=' ', mode='w+') print(f'{cpd} file written') print('All Files Done')
def ratios(): """ This function creates the ethane/methane and acetylene/methane ratios """ # import data sets root = r'C:\Users\ARL\Desktop\Jashan\Summit\analyses\Data' ethane = pd.read_csv(root + r'\ethaneFIT.txt', delim_whitespace=True, error_bad_lines=False, header=None) ace = pd.read_csv(root + r'\acetyleneFIT.txt', delim_whitespace=True, error_bad_lines=False, header=None) methane = pd.read_csv(root + r'\methane.txt', delim_whitespace=True, error_bad_lines=False, header=None) cols = ['DecYear', 'val', 'func', 'resid'] # column names # cleaning up data for sheet in [ethane, ace, methane]: # reassign col names if np.logical_or(sheet.iloc[0][0] == ethane.iloc[0][0], sheet.iloc[0][0] == ace.iloc[0][0]): sheet.columns = cols sheet.drop(['func', 'resid'], axis=1, inplace=True) # drop misc cols else: sheet.columns = cols + ['smooth'] sheet.drop(['func', 'resid', 'smooth'], axis=1, inplace=True) sheet = sheet[sheet['DecYear'] >= 2012] # remove pre 2012 vals sheet.dropna(axis=0, how='any', inplace=True) # remove NaN rows # create ratios tolerance = 3 # tolerance in hours # convert tolerance to decimal doy tolerance = ((tolerance / 24) / 365) ethane.name = 'ethane' ace.name = 'ace' for sheet in [ethane, ace]: combinedsheet = pd.merge_asof(sheet.sort_values('DecYear'), methane.sort_values('DecYear'), on='DecYear', tolerance=tolerance, direction='nearest') datesheet = decToDatetime(combinedsheet['DecYear'].tolist()) combinedsheet['datetime'] = datesheet combinedsheet.columns = [ 'DecYear', f'{sheet.name}', 'methane', 'datetime' ] ratio = combinedsheet[f'{sheet.name}'] / combinedsheet['methane'] combinedsheet.drop(['DecYear', f'{sheet.name}', 'methane'], axis=1, inplace=True) combinedsheet['ratio'] = ratio combinedsheet.dropna(axis=0, inplace=True, how='any') df = noaaDateConv(combinedsheet) df.to_csv(f'{sheet.name}Ratio_Aug.txt', header=None, index=None, sep=' ', mode='w+')