def picarroMetCombo(filename): met = metTrim() sheet = pd.read_csv(filename, encoding='utf8', header=None, delim_whitespace=True) sheet.columns = ['date', 'value'] sheet['datetime'] = decToDatetime(sheet['date'].values) sheet.drop('date', axis=1, inplace=True) earlyVals = ~(met['datetime'] <= sheet['datetime'][0]) met.drop(earlyVals, axis=0, inplace=True) met.reset_index(drop=True, inplace=True) met.drop(['steady'], axis=1, inplace=True) # merge the met data onto the concentration data by finding the nearest datetime within an hour\ sheet.dropna(axis=0, how='any', inplace=True) picarro = pd.merge_asof(sheet.sort_values('datetime'), met, on='datetime', direction='nearest', tolerance=pd.Timedelta('1 hour')) picarro.dropna(axis=0, how='any', inplace=True) return picarro
def ratios(): # import data sets root = r'C:\Users\ARL\Desktop\J_Summit\analyses\Data' ethane = pd.read_csv(root + r'\ethaneFIT.txt', delim_whitespace=True, error_bad_lines=False, header=None) ace = pd.read_csv(root + r'\acetyleneFIT.txt', delim_whitespace=True, error_bad_lines=False, header=None) methane = pd.read_csv(root + r'\methane.txt', delim_whitespace=True, error_bad_lines=False, header=None) cols = ['DecYear', 'val', 'func', 'resid'] # column names # cleaning up data for sheet in [ethane, ace, methane]: # reassign col names if np.logical_or(sheet.iloc[0][0] == ethane.iloc[0][0], sheet.iloc[0][0] == ace.iloc[0][0]): sheet.columns = cols sheet.drop(['func', 'resid'], axis=1, inplace=True) # drop misc cols else: sheet.columns = cols + ['smooth'] sheet.drop(['func', 'resid', 'smooth'], axis=1, inplace=True) sheet = sheet[sheet['DecYear'] >= 2012] # remove pre 2012 vals sheet.dropna(axis=0, how='any', inplace=True) # remove NaN rows # create ratios tolerence = 3 # tolerence in hours ethane.name = 'ethane' ace.name = 'ace' for sheet in [ethane, ace]: ratiosheet, datesheet = ratioCreator(tolerence, sheet, methane) datesheet = decToDatetime(datesheet) df = pd.DataFrame(columns=['datetime', 'val']) df['datetime'], df['val'] = datesheet, ratiosheet df = noaaDateConv(df) df.to_csv(f'{sheet.name}Ratio.txt', header=None, index=None, sep=' ', mode='w+')
def methane(): # import original dataset and new datasets methanePrev = loadExcel( r"C:\Users\ARL\Desktop\Jashan\Summit\analyses\Data\Methane.xlsx") methane2018 = loadExcel(r'C:\Users\ARL\Desktop\SUM_CH4_insitu_2018.xlsx') methane2019 = loadExcel( r'C:\Users\ARL\Desktop\Summit_GC_2019\CH4_results\SUM_CH4_insitu_2019.xlsx' ) # identify column names we want to keep goodcol = ['Decimal Year', 'Run median'] # good columns badcol = [x for x in methane2018.columns if x not in goodcol] # bad columns newnames = ['DecYear', 'MR'] for sheet in [methane2018, methane2019]: sheet.drop(badcol, axis=1, inplace=True) # drop bad columns sheet.dropna(how='any', axis=0, inplace=True) # drop NaN rows sheet.columns = newnames # assign same col names methanePrev = methanePrev[methanePrev['DecYear'] < 2018] # remove some pre 2018 vals comb = [methanePrev, methane2018, methane2019] # create combination frame methaneFinal = pd.concat(comb) # concat # trim extreme outliers values = methaneFinal['MR'].values z = np.abs(stats.zscore(values)) thresh = 5 methaneFinal = methaneFinal[~(z > thresh)] dates = decToDatetime(methaneFinal['DecYear'].values) # conv to datetime methaneFinal['datetime'] = dates # add to dataframe noaaMethane = pd.DataFrame(columns=['datetime', 'MR']) noaaMethane['datetime'], noaaMethane['MR'] = dates, methaneFinal[ 'MR'].values # noaa version noaaMethane = noaaDateConv(noaaMethane) noaaMethane.to_csv('methane2019updated.txt', header=None, index=None, sep=' ', mode='w+') return methaneFinal
def fireTrack(): # import alternate data root = r'C:\Users\ARL\Desktop\Summit\analyses\Data' ace = readCsv(root + '\\' + r'aceRatioNoaa.txt') # import fire data virrs = True root = r'C:\Users\ARL\Desktop\FireData' if virrs: fire = pd.read_csv(root + r'\fire_archive_V1_60132.csv') else: fire = pd.read_csv(root + r'\fire_archive_M6_60131.csv') # data triming, reassign headers, add datetime column header = ['decyear', 'value', 'function', 'resid', 'residsmooth'] ace.columns = header ace = ace[ace['value'] >= 0.00000001] ace['datetime'] = decToDatetime(ace['decyear'].values) ace['normResid'] = ace['resid'].values / ace['value'].values # combine fire and other dataset to produce master dataframe for analysis master = fireCombo(fire, ace, VIRRS=virrs) # identify average z score avg_vals = np.average(master['value_z'].values) avg_norms = np.average(master['normed_z'].values) print(f'The average z score in values is {avg_vals}') print(f'The average z score in normalized residuals is {avg_norms}') mybounds = {'x': (-73.2, -9.4), 'y': (57.8, 84.3)} # scatterplot mapping img = mpimg.imread(root + r'\greenland.PNG') if virrs: master.plot(kind='scatter', x='longitude', y='latitude', c='bright_ti4', cmap=plt.get_cmap('magma_r'), colorbar=True, figsize=(10, 7)) else: master.plot(kind='scatter', x='longitude', y='latitude', c='brightness', cmap=plt.get_cmap('magma_r'), colorbar=True, figsize=(10, 7)) plt.imshow(img, extent=[ mybounds['x'][0], mybounds['x'][1], mybounds['y'][0], mybounds['y'][1] ], alpha=0.5) plt.xlabel('Longitude', fontsize=14) plt.ylabel('Latitude', fontsize=14) if virrs: plt.title('NASA VIIRS Fire Count Overlay on Greenland') else: plt.title('NASA MODIS Fire Count Overlay on Greenland') plt.legend() plt.show()
def ch4plot(): header = ['yr', 'value', 'function', 'resid', 'residLine'] # dataframe headers root = r'C:\Users\ARL\Desktop\J_Summit\analyses\Data' filepath = root + '\\' + 'methane.txt' data = readCsv(filepath) data.columns = header register_matplotlib_converters() # convert the dec year col to datetime dates = decToDatetime(data['yr']) data['datetime'] = dates data.drop('yr', axis=1, inplace=True) # y bounds values = data['value'] mean = np.mean(values) lowV = min(values) - (mean / 100) # arbitrary vals look ok highV = max(values) + (mean / 100) mean = np.mean(data['resid'].values) lowR = min(data['resid']) - (mean / 3) highR = max(data['resid']) + (mean / 3) # x bounds low = min(data['datetime']) - dt.timedelta(days=30) high = max(data['datetime']) + dt.timedelta(days=30) # plotting sns.set() # setup f, ax = plt.subplots(nrows=2, figsize=(12, 8)) # 2 column subplot sns.despine(f) plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.3, hspace=0.5) # background data values with fitted harmonic functions ax1 = sns.scatterplot(x='datetime', y='value', data=data, ax=ax[0], alpha=0.7, s=10, legend='brief', label='GC Data') ax2 = sns.lineplot(x='datetime', y='function', data=data, ax=ax[0], linewidth=2, label='Fitted Curve') ax1.set_title('GC Methane Data with Fitted Function') ax1.set_xlabel('Date') ax1.set_ylabel('Mixing Ratio [ppb]') ax1.set(xlim=(low, high)) ax1.set(ylim=(lowV, highV)) ax1.get_lines()[0].set_color('#00b386') ax1.legend() # residual data ax3 = sns.scatterplot(x='datetime', y='resid', data=data, ax=ax[1], alpha=1, s=10, legend='brief', label='Residuals from Fit') ax4 = sns.lineplot(x='datetime', y='residLine', data=data, ax=ax[1], linewidth=2, label='Fitted Residual Curve') ax3.set_title('GC Residuals from Fitted Function') ax3.set_xlabel('Date') ax3.set_ylabel('Mixing Ratio [ppb]') ax4.get_lines()[0].set_color('#00b386') ax3.legend() ax3.set(xlim=(low, high)) ax3.set(ylim=(lowR, highR)) # save the plots direc = r'C:\Users\ARL\Desktop\J_Summit\analyses\Figures' + '\\' + 'methane.png' f.savefig(direc, format='png')
def plotratios(hours, ethane=True, all=True, summer=True, viirs=True): """ plotratios is a function that imports either the acetylene/methane ratio or the ethane/methane ratio data and plots it, various conditions can be set. :param hours: Number of back trajectory hours ran with Hysplit, used for plot titles :param ethane: Default True. Set to false for acetylene data. :param all: Default True, uses all data. Set to false to cut z scores below 3. :param summer: Default True, cuts winter data. Set to false to use only winter data and cut summer data :param viirs: Default True, uses viirs fire data. Set to false to use MODIS C6 data :return: nothing, displays plot with plt.show() """ # Create titles and set data path depending on options dataroot = r'C:\Users\ARL\Desktop\Jashan\Summit\analyses\Data' # data directory trajroot = r'C:\Users\ARL\Desktop\Jashan\Jashan PySplit\pysplitprocessor-master\pysplitprocessor\messeduptime_notUTC' if ethane: if all: root = os.path.join(trajroot, 'ethane_methane_all') title = f'{hours}h Back Trajectories of Ethane/Methane Ratio, 2012-2019' sheet = readCsv(dataroot + r'\ethaneRatioNoaa.txt') else: title = f'{hours}h Back Trajectories of Ethane/Methane Ratio Outliers, 2012-2019' sheet = readCsv(dataroot + r'\ethaneRatioNoaa.txt') else: if all: root = r'C:\Users\ARL\Desktop\Jashan\Jashan ' \ r'PySplit\pysplitprocessor-master\pysplitprocessor\aceTraj' title = f'{hours}h Back Trajectories of Acetylene/Methane Ratio, 2012-2018' sheet = readCsv(dataroot + r'\aceRatioNoaa.txt') else: title = f'{hours}h Back Trajectories of Acetylene/Methane Ratio Outliers, 2012-2019' root = r'C:\Users\ARL\Desktop\Jashan ' \ r'PySplit\pysplitprocessor-master\pysplitprocessor\ace_methane_traj_highz' sheet = readCsv(dataroot + r'\aceRatioNoaa.txt') header = ['decyear', 'value', 'function', 'resid', 'residsmooth'] # create header sheet.columns = header # assign column names sheet = sheet[sheet['value'] >= 0.000001] # remove zero values sheet['datetime'] = decToDatetime(sheet['decyear'].values) # create datetimes from decyear sheet['datetime'] = sheet['datetime'] + pd.Timedelta('3 hours') # convert tz to UTC dates = sheet['datetime'].tolist() # put datetimes in list julian = [] # preallocate for d in dates: # loop over each date tt = d.timetuple() # create a timetuple from date jul = tt.tm_yday # get the julian year julian.append(jul) # append that to a list sheet['julian'] = julian # add to dataframe cutoffs = (120, 305) if summer: keep = np.logical_and(sheet['julian'] >= cutoffs[0], # find just summer values sheet['julian'] <= cutoffs[1]) print('-- Winter Data Removed') else: keep = ~(np.logical_and(sheet['julian'] >= cutoffs[0], # find just winter values sheet['julian'] <= cutoffs[1])) print('-- Summer Data Removed') sheet = sheet[keep] dropcols = ['decyear', 'function', 'residsmooth'] # columns to drop sheet.drop(dropcols, axis=1, inplace=True) # drop unused columns # remove slow data or data above 342, below 72 degrees at Summit camp due to possible pollution sheetClean = metRemove(sheet, 1, dropMet=True) residuals = sheetClean['resid'].values # numpy array of resid z = np.abs(stats.zscore(residuals)) # calculate z scores sheetClean['zscores'] = z # assign as column if all: thresh = 0 # z score threshold else: thresh = 3 sheetZ = sheetClean[z > thresh] # remove non outliers sheetZ.reset_index(drop=True, inplace=True) trajPlot(root, title=title, zscores=sheetZ, viirs=viirs, summer=summer)
from metRemove import metRemove from scipy import stats hours = 72 title = f'{hours}h Back Trajectories of Acetylene/Methane Ratio Outliers, 2012-2018' root = r'C:\Users\ARL\Desktop\Jashan PySplit\pysplitprocessor-master\pysplitprocessor\ace_methane_traj' dataroot = r'C:\Users\ARL\Desktop\Summit\analyses\Data' # data directory ace = readCsv(dataroot + r'\aceRatioNoaa.txt') # data read in acetylene header = ['decyear', 'value', 'function', 'resid', 'residsmooth'] # assign column names ace.columns = header ace = ace[ace['value'] >= 0.00000001] ace['datetime'] = decToDatetime( ace['decyear'].values) # create datetimes from decyear dates = ace['datetime'].tolist() # put datetimes in a list julian = [] # preallocate julian day list for d in dates: # loop over each date tt = d.timetuple() # create a timetuple jul = tt.tm_yday # identify julian day julian.append(jul) # append to list ace['julian'] = julian # add to dataframe cutoffs = (120, 305) # identify julian cutoffs keep = np.logical_and( ace['julian'] >= cutoffs[0], # create boolean and array ace['julian'] <= cutoffs[1]) ace = ace[keep] # boolean index to remove winter
def nmhc(): start = time.time() # import original data set and new datasets homedir = r'C:\Users\ARL\Desktop\Jashan\SummitWildfireTracers' root = os.path.join(homedir, 'Data') nmhcPrev = loadExcel(os.path.join(root, 'NMHC.xlsx')) nmhc2018 = loadExcel(r'C:\Users\ARL\Desktop\Ambient_2018_V2.xlsx') nmhc2019 = loadExcel( r'C:\Users\ARL\Desktop\Summit_GC_2019\NMHC_results\Ambient_2019.xlsx') # identify the mixing ratio rows allrows = list(range(0, len(nmhc2018.index))) rowstokeep = list(range(70, 94)) rowstodrop = [x for x in allrows if x not in rowstokeep] # drop rows from nmhc2018 and nmhc2019 nmhc2018 = nmhc2018.drop(rowstodrop, axis=0) nmhc2019 = nmhc2019.drop(rowstodrop, axis=0) # drop unnecesary columns and rows with nan, then cols with nan dropcols = ['Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3'] nmhc2018, nmhc2019 = nmhc2018.drop(dropcols, axis=1), nmhc2019.drop(dropcols, axis=1) nmhc2018 = nmhc2018.dropna( axis=0, how='all', subset=[x for x in nmhc2018.columns if x not in ['Unnamed: 0']]) nmhc2019 = nmhc2019.dropna( axis=0, how='all', subset=[x for x in nmhc2019.columns if x not in ['Unnamed: 0']]) # transpose, reset columns, drop first row and last row nmhc2018, nmhc2019 = nmhc2018.T.reset_index(), nmhc2019.T.reset_index() nmhc2018.columns, nmhc2019.columns = list(nmhc2018.loc[0]), list( nmhc2019.loc[0]) nmhc2018 = nmhc2018.drop([0, len(nmhc2018) - 1], axis=0) nmhc2019 = nmhc2019.drop([0, len(nmhc2019) - 1], axis=0) end = time.time() print('transposed in ', end - start) # create datetime column for each dataframe for yr in [nmhc2018, nmhc2019]: datetime = [] sampledate = yr['Unnamed: 0'][1] yearstr = str(sampledate)[:4] yearint = int(yearstr) # gets the year for x in yr[f'Decimal Day of Year {str(yearstr)[:4]}']: datetime.append(decToDatetime(x)) # call decyear conv yr['datetime'] = datetime # create datetime column for past data datetime = [] for x in nmhcPrev['DecYear']: datetime.append(decToDatetime(x)) nmhcPrev['datetime'] = datetime # remove old unneeded date columns for yr in [nmhc2018, nmhc2019]: sampledate = yr['Unnamed: 0'][1] yearstr = str(sampledate)[:4] badcols = [ 'Day', 'Hour', 'Minute', 'Unnamed: 0', f'Decimal Day of Year {str(yearstr)[:4]}' ] yr.drop(badcols, axis=1, inplace=True) badcols = ['DecYear', 'DOY', 'Ignore'] nmhcPrev.drop(badcols, axis=1, inplace=True) end = time.time() print('datetimes created in ', end - start) # combine all datasets into one dataframe nmhcPrev = nmhcPrev[nmhcPrev['datetime'] < dt.datetime(2018, 1, 1)] # remove 2018 nmhcPrev = nmhcPrev.append(nmhc2018) # add all 2018 nmhcPrev = nmhcPrev.append(nmhc2019) # add all 2019 end = time.time() print('datasets combined in ', end - start) # create textfiles for each NMHC compounds = [ 'ethane', 'ethene', 'propane', 'propene', 'i-butane', 'acetylene', 'n-butane', 'i-pentane', 'n-pentane', 'hexane', 'Benzene', 'Toluene' ] for cpd in compounds: values = nmhcPrev[cpd] # get the specfic cpd dates = nmhcPrev['datetime'] # get the specific datetimes final = pd.concat([dates, values], axis=1) final = final.dropna(axis=0, how='any') # drop the NANs final = final[final['datetime'] > dt.datetime( 2011, 1, 1)] # remove pre2012 values because of gap final = noaaDateConv(final) # conv date formats final.to_csv(f'{cpd}.txt', header=None, index=None, sep=' ', mode='w+') print(f'{cpd} file written') print('All Files Done')
def ratioPlot(): register_matplotlib_converters() # import data homedir = r'C:\Users\ARL\Desktop\Jashan\SummitWildfireTracers' root = os.path.join(homedir, 'Data') ethane = readCsv(root + r'\ethaneRatioNoaa.txt') ace = readCsv(root + r'\aceRatioNoaa.txt') # data trimming, reassign headers, add datetime column header = ['decyear', 'value', 'function', 'resid', 'residsmooth'] for sheet in [ethane, ace]: sheet.columns = header ethane = ethane[ethane['value'] >= 0.0000001] ace = ace[ace['value'] >= 0.00000001] ethane.name = 'Ethane' ace.name = 'Acetylene' for sheet in [ethane, ace]: sheet['datetime'] = decToDatetime(sheet['decyear'].values) if sheet.name == 'Ethane': ethane = sheet else: ace = sheet # plotting sns.set() f, ax = plt.subplots(nrows=3, figsize=(12, 8)) sns.despine(f) plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.8) ax1 = sns.scatterplot(x='datetime', y='value', data=sheet, alpha=0.7, label='Original Data', ax=ax[0]) ax2 = sns.lineplot(x='datetime', y='function', data=sheet, linewidth=2, label='Fitted Function', ax=ax[0]) ax1.set_title(sheet.name + ' / Methane Ratio', size=26) ax1.set_xlabel('Datetime', fontsize=22) ax1.set_ylabel('Ratio Value', fontsize=18) ax1.set(xlim=((min(sheet['datetime']) - dt.timedelta(days=10)), (max(sheet['datetime']) + dt.timedelta(days=10)))) ax1.set(ylim=(min(sheet['value']) - np.mean(sheet['value'] / 3), max(sheet['value']) + np.mean(sheet['value'] / 3))) ax2.get_lines()[0].set_color('purple') ax1.legend(prop={'size': 14}) ax3 = sns.scatterplot(x='datetime', y='resid', data=sheet, alpha=0.7, label='Residuals', ax=ax[1]) ax4 = sns.lineplot(x='datetime', y='residsmooth', data=sheet, linewidth=2, label='Smoothed Residual Fit', ax=ax[1]) ax4.get_lines()[0].set_color('purple') ax3.set_title('Residuals in ' + sheet.name, size=26) ax3.set_xlabel('Datetime', fontsize=22) ax3.set_ylabel('Residual / Value', fontsize=18) ax3.set(xlim=((min(sheet['datetime']) - dt.timedelta(days=10)), (max(sheet['datetime']) + dt.timedelta(days=10)))) ax3.set(ylim=(np.mean(sheet['resid']) - np.std(sheet['resid']) * 8, np.mean(sheet['resid']) + np.std(sheet['resid']) * 8)) ax3.legend(prop={'size': 14}) # day of year plot residuals doy = [] for x in sheet['datetime']: tt = x.timetuple() doy.append(tt.tm_yday) sheet['DOY'] = doy ax5 = sns.scatterplot(x='DOY', y='resid', data=sheet, alpha=0.7, label='Residuals', ax=ax[2]) ax5.set_title('Residuals by Julian Day', size=26) ax5.set_xlabel('Day of Year', fontsize=22) ax5.set_ylabel('Residual / Value', fontsize=18) ax5.set(xlim=((min(sheet['DOY'])), (max(sheet['DOY'])))) ax5.set(ylim=(np.mean(sheet['resid']) - np.std(sheet['resid']) * 8, np.mean(sheet['resid']) + np.std(sheet['resid']) * 8)) ax5.legend(prop={'size': 14}) direc = os.path.join(homedir, 'Figures') + '\\' + sheet.name + 'Ratio.png' f.savefig(direc, format='png') for ax in [ax1, ax2, ax3, ax4, ax5]: ax.tick_params(labelsize=18) matplotlib.rc("legend", fontsize=26) # plotting separate heatmap sns.set(style="white", font_scale=1.5) sns.despine() combo = pd.merge_asof(ethane, ace, on='datetime', direction='nearest') combo.dropna(axis=0, inplace=True, how='any') combo.drop(combo.index[5586:5776], axis=0, inplace=True) x = np.array(combo['resid_x']).reshape((-1, 1)) y = np.array(combo['resid_y']) model = LinearRegression().fit(x, y) # create liner regression fit rSquared = model.score(x, y) # assign coeff of determination slope = model.coef_ # assign slope g = sns.jointplot(combo['resid_x'], combo['resid_y'], kind='reg', color='#e65c00', line_kws={ 'label': 'rSquared: {:1.5f}\n Slope: {:1.5f}\n'.format( rSquared, slope[0]) }) g.set_axis_labels('Ethane/Methane Ratio', 'Acetylene/Methane Ratio', fontsize=20) plt.tick_params(axis='both', labelsize=18) g.fig.suptitle('Correlation between Ethane and Acetylene Ratio Residuals', fontsize=28) g.ax_joint.get_lines()[0].set_color('blue') plt.legend() plt.show()
def ratioPlot(): register_matplotlib_converters() # import data root = r'C:\Users\ARL\Desktop\J_Summit\analyses\Data' ethane = readCsv(root + r'\ethaneRatioNoaa.txt') ace = readCsv(root + r'\aceRatioNoaa.txt') # data triming, reassign headers, add datetime column header = ['decyear', 'value', 'function', 'resid', 'residsmooth'] for sheet in [ethane, ace]: sheet.columns = header ethane = ethane[ethane['value'] >= 0.0000001] ace = ace[ace['value'] >= 0.00000001] ethane.name = 'Ethane' ace.name = 'Acetylene' for sheet in [ethane, ace]: sheet['datetime'] = decToDatetime(sheet['decyear'].values) normResid = sheet['resid'].values / sheet['value'].values normSmooth = sheet['residsmooth'].values / sheet['value'].values sheet.drop(['resid', 'residsmooth'], axis=1, inplace=True) sheet['resid'] = normResid sheet['residsmooth'] = normSmooth if sheet.name == 'Ethane': ethane = sheet else: ace = sheet # plotting sns.set() f, ax = plt.subplots(nrows=3, figsize=(12, 8)) sns.despine(f) plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.8) ax1 = sns.scatterplot(x='datetime', y='value', data=sheet, alpha=0.7, label='Original Data', ax=ax[0]) ax2 = sns.lineplot(x='datetime', y='function', data=sheet, linewidth=2, label='Fitted Function', ax=ax[0]) ax1.set_title(sheet.name + ' / Methane Ratio') ax1.set_xlabel('Datetime') ax1.set_ylabel('Mixing Ratio [ppb]') ax1.set(xlim=((min(sheet['datetime']) - dt.timedelta(days=10)), (max(sheet['datetime']) + dt.timedelta(days=10)))) ax1.set(ylim=(min(sheet['value']) - np.mean(sheet['value'] / 3), max(sheet['value']) + np.mean(sheet['value'] / 3))) ax2.get_lines()[0].set_color('purple') ax1.legend() ax3 = sns.scatterplot(x='datetime', y='resid', data=sheet, alpha=0.7, label='Normalized Residuals', ax=ax[1]) ax4 = sns.lineplot(x='datetime', y='residsmooth', data=sheet, linewidth=2, label='Smoothed Residual Fit', ax=ax[1]) ax4.get_lines()[0].set_color('purple') ax3.set_title('Normalized Residuals in ' + sheet.name) ax3.set_xlabel('Datetime') ax3.set_ylabel('Mixing Ratio [ppb]') ax3.set(xlim=((min(sheet['datetime']) - dt.timedelta(days=10)), (max(sheet['datetime']) + dt.timedelta(days=10)))) ax3.set(ylim=(np.mean(sheet['resid']) - np.std(sheet['resid']) * 8, np.mean(sheet['resid']) + np.std(sheet['resid']) * 8)) ax3.legend() # day of year plot residuals doy = [] for x in sheet['datetime']: tt = x.timetuple() doy.append(tt.tm_yday) sheet['DOY'] = doy ax5 = sns.scatterplot(x='DOY', y='resid', data=sheet, alpha=0.7, label='Normalized Residuals', ax=ax[2]) ax5.set_title('Normalized Residuals by Julian Day') ax5.set_xlabel('Day of Year') ax5.set_ylabel('Mixing Ratio [ppb]') ax5.set(xlim=((min(sheet['DOY'])), (max(sheet['DOY'])))) ax5.set(ylim=(np.mean(sheet['resid']) - np.std(sheet['resid']) * 8, np.mean(sheet['resid']) + np.std(sheet['resid']) * 8)) ax5.legend() direc = r'C:\Users\ARL\Desktop\J_Summit\analyses\Figures' + '\\' + sheet.name + 'Ratio.png' f.savefig(direc, format='png') # plotting seperate heatmap sns.set(style="white") sns.despine() combo = pd.merge_asof(ethane, ace, on='datetime', direction='nearest') combo = combo[combo['resid_y'] > -5] x = np.array(combo['resid_x']).reshape((-1, 1)) y = np.array(combo['resid_y']) model = LinearRegression().fit(x, y) # create liner regression fit rSquared = model.score(x, y) # assign coeff of determination slope = model.coef_ # assign slope g = sns.jointplot(combo['resid_x'], combo['resid_y'], kind='reg', color='#e65c00', line_kws={ 'label': 'rSquared: {:1.5f}\n Slope: {:1.5f}\n'.format( rSquared, slope[0]) }) g.set_axis_labels('Ethane MR [ppb]', 'Acetylene MR [ppb]', fontsize=12) g.fig.suptitle( 'Correlation between Ethane and Acetylene Normalized Residuals') g.ax_joint.get_lines()[0].set_color('blue') plt.legend() plt.show()
def ratios(): """ This function creates the ethane/methane and acetylene/methane ratios """ # import data sets root = r'C:\Users\ARL\Desktop\Jashan\Summit\analyses\Data' ethane = pd.read_csv(root + r'\ethaneFIT.txt', delim_whitespace=True, error_bad_lines=False, header=None) ace = pd.read_csv(root + r'\acetyleneFIT.txt', delim_whitespace=True, error_bad_lines=False, header=None) methane = pd.read_csv(root + r'\methane.txt', delim_whitespace=True, error_bad_lines=False, header=None) cols = ['DecYear', 'val', 'func', 'resid'] # column names # cleaning up data for sheet in [ethane, ace, methane]: # reassign col names if np.logical_or(sheet.iloc[0][0] == ethane.iloc[0][0], sheet.iloc[0][0] == ace.iloc[0][0]): sheet.columns = cols sheet.drop(['func', 'resid'], axis=1, inplace=True) # drop misc cols else: sheet.columns = cols + ['smooth'] sheet.drop(['func', 'resid', 'smooth'], axis=1, inplace=True) sheet = sheet[sheet['DecYear'] >= 2012] # remove pre 2012 vals sheet.dropna(axis=0, how='any', inplace=True) # remove NaN rows # create ratios tolerance = 3 # tolerance in hours # convert tolerance to decimal doy tolerance = ((tolerance / 24) / 365) ethane.name = 'ethane' ace.name = 'ace' for sheet in [ethane, ace]: combinedsheet = pd.merge_asof(sheet.sort_values('DecYear'), methane.sort_values('DecYear'), on='DecYear', tolerance=tolerance, direction='nearest') datesheet = decToDatetime(combinedsheet['DecYear'].tolist()) combinedsheet['datetime'] = datesheet combinedsheet.columns = [ 'DecYear', f'{sheet.name}', 'methane', 'datetime' ] ratio = combinedsheet[f'{sheet.name}'] / combinedsheet['methane'] combinedsheet.drop(['DecYear', f'{sheet.name}', 'methane'], axis=1, inplace=True) combinedsheet['ratio'] = ratio combinedsheet.dropna(axis=0, inplace=True, how='any') df = noaaDateConv(combinedsheet) df.to_csv(f'{sheet.name}Ratio_Aug.txt', header=None, index=None, sep=' ', mode='w+')
header = ['yr', 'value', 'function', 'resid'] # dataframe headers register_matplotlib_converters() for cpd in compounds: filename = root + '\\' + cpd + 'FIT.txt' # file ext data = readCsv(filename) data.columns = header # reset column names data = data[data['value'] > 0.0] normResid = data['resid'].values / data['value'].values data.drop(['resid'], axis=1, inplace=True) data['resid'] = normResid dates = decToDatetime(data['yr'].values) # call conv function data['datetime'] = dates # assign to DF data.drop('yr', axis=1, inplace=True) # trim a few extreme outliers values = data['value'].values # get the value col z = np.abs(stats.zscore(values)) # get the z score thresh = 2 # > 3 std devs data = data[~(z > thresh)] # boolean index resids = data['resid'].values # same thing but trim resid z = np.abs(stats.zscore(resids)) thresh = 5 data = data[~(z > thresh)] # y bounds
def windRoseMethane(): # ---- import data root = r'C:\Users\ARL\Desktop\J_Summit\analyses\Data' met = metTrim() arl = metCombo(root + r'\methane2019updated.txt') arl.dropna(axis=0, how='any', inplace=True) pic = pd.read_csv(root + r'\picarro_ch4.txt', delim_whitespace=True, encoding='utf8', error_bad_lines=False, header=None) flask = pd.read_csv(root + r'\flask_ch4.txt', delim_whitespace=True, error_bad_lines=False, header=None) # ---- combining datasets (met and picarro) pic.columns = ['date', 'value'] pic['datetime'] = decToDatetime(pic['date'].values) pic.drop('date', axis=1, inplace=True) met.drop(['steady'], axis=1, inplace=True) # merge the met data onto the concentration data by finding the nearest datetime within an hour pic.dropna(axis=0, how='any', inplace=True) picarro = pd.merge_asof(pic.sort_values('datetime'), met, on='datetime', direction='nearest', tolerance=pd.Timedelta('1 hour')) picarro.dropna(axis=0, how='any', inplace=True) # ---- combining datasets (flask and met) met = metTrim() colnames = ['yr', 'mo', 'dy', 'hr', 'val'] flask.columns = colnames flask['datetime'] = createDatetime(flask['yr'], flask['mo'], flask['dy'], flask['hr']) flask.drop(['yr', 'mo', 'dy', 'hr'], axis=1, inplace=True) earlyVals = (met['datetime'] <= flask['datetime'][0]) met.drop(['steady'], axis=1, inplace=True) # merge the met data onto the concentration data by finding the nearest datetime within an hour flaskMet = pd.merge_asof(flask, met, on='datetime', direction='nearest', tolerance=pd.Timedelta('1 hour')) flaskMet.dropna(axis=0, how='any', inplace=True) # ---- plotting fig, (ax1, ax2, ax3) = plt.subplots(1, 3, subplot_kw=dict(projection='windrose')) fig.suptitle('Methane Conc. at Summit by Wind Direction', fontsize=16) plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.2, hspace=-0.3) # setup GC methane windrose ax1.bar(arl['dir'].values, arl['val'].values, normed=False, opening=0.9, edgecolor='black', nsector=24, bins=14, cmap=cm.viridis_r, blowto=False) ax1.set_title('GCFID Methane Conc. [ppb]\n') ax1.set_legend(loc=8, fancybox=True, shadow=True, bbox_to_anchor=(0.5, -1.05)) # setup picarro methane windrose ax2.bar(picarro['dir'].values, picarro['value'].values, normed=False, opening=0.9, edgecolor='black', nsector=24, bins=14, cmap=cm.viridis_r, blowto=False) ax2.set_title('Picarro Methane Conc. [ppb]\n', ) ax2.set_legend(loc=8, fancybox=True, shadow=True, bbox_to_anchor=(0.5, -1.05)) # setup flask methane windrose ax3.bar(flaskMet['dir'].values, flaskMet['val'].values, normed=False, opening=0.9, edgecolor='black', nsector=24, bins=14, cmap=cm.viridis_r, blowto=False) ax3.set_title('Flask Methane Conc. [ppb]\n') ax3.set_legend(loc=8, fancybox=True, shadow=True, bbox_to_anchor=(0.5, -1.05)) plt.show()