Currently, a few data cleaning tactics are employed. First, since there is no methane data past 2012, the VOC data before 2012 is cut. We only look at ethane and acetylene in this circumstance, so other VOC columns are removed. Any row with a NaN value is completely dropped. Since methane is sampled more frequently than the VOC's, methane values from +/- three hours from any given VOC data point are averaged and then used to calculate the ratio of VOC / ch4 at any given VOC datapoint timestep. """ # Import libraries from fileInput import fileLoad import pandas as pd import numpy as np import matplotlib.pyplot as plt # Import Data Sets from Excel nmhcData = fileLoad(r"C:\Users\ARL\Desktop\Python Code\Data\NMHC.xlsx") methaneData = fileLoad(r"C:\Users\ARL\Desktop\Python Code\Data\Methane.xlsx") # Cleaning Up Data nmhcData = nmhcData[nmhcData['DecYear'] > 2012] # Only need years past 2012 in VOC Data reqRows = ['DecYear', 'ethane', 'acetylene'] # only need date, ethane, and acetylene nmhcData = nmhcData[reqRows] # just get required rows methaneData = methaneData.dropna( axis=0, how='any') # Remove NaN values, entire row is removed nmhcData = nmhcData.dropna(axis=0, how='any') # Define preliminary variables years = np.linspace(2012, 2018, num=((2018 - 2012) + 1)) # Array of Years numYears = np.size(years) # Total number of years
def ratioCalc(): import numpy as np from fileInput import fileLoad from isleapyear import isleapyear # Import Data Sets nmhcData = fileLoad(r"C:\Users\ARL\Desktop\Python Code\Data\NMHC.xlsx") methaneData = fileLoad( r"C:\Users\ARL\Desktop\Python Code\Data\Methane.xlsx") numYears = np.linspace(2012, 2018, num=((2018 - 2012) + 1)) # Total Number of Years in Dataset nmhcDateAll = nmhcData.loc[:, 'DecYear'] # nmhc dates ch4Date = methaneData.loc[:, 'DecYear'] # methane dates hrs3 = 3 * 60 * 60 # three hours in seconds # Preallocate Ratio Matrices ethaneMethane = np.full((np.size(numYears), 1033), np.nan) # Columns are for each year aceMethane = np.full((np.size(numYears), 1033), np.nan) # Rows are for the actual ratio values datesFinal = np.full((np.size(numYears), 1033), np.nan) # Dates for these ratio arrays for i in numYears: # MAIN LOOP # Date Variables for given year nmhcDate = nmhcDateAll.loc[(nmhcDateAll >= i) & ( nmhcDateAll < (i + 1))].values # gathers current year nmhcDate = 1 + ( (nmhcDate - i) * (365 + isleapyear(i)) * 24 * 60 * 60) # convert to seconds methaneDate = ch4Date.loc[(ch4Date >= i) & (ch4Date < (i + 1))].values methaneDate = 1 + (methaneDate - i) * (365 + isleapyear(i)) * 24 * 60 * 60 # Yearly compound values ethane = nmhcData.loc[(nmhcDateAll >= i) & (nmhcDateAll < (i + 1)), 'ethane'].values ace = nmhcData.loc[(nmhcDateAll >= i) & (nmhcDateAll < (i + 1)), 'acetylene'].values methane = methaneData.loc[(ch4Date >= i) & (ch4Date < (i + 1)), 'MR'].values # Create Ratio Vectors for j, value in np.ndenumerate(ethane): # LOOP: Ethane values high = nmhcDate[ j] + hrs3 # current Ethane timestep in seconds + 3 hours low = nmhcDate[ j] - hrs3 # current ethane timestep in seconds - 3 hours # Get the average of all methane values between high and low methaneAverage = np.mean(methane[(methaneDate[:] <= high) & (methaneDate[:] >= low)]) ethaneMethane[np.where(numYears == i), j] = value / methaneAverage # Fills out matrix for k, value in np.ndenumerate(ace): # LOOP: Acetylene Values high = nmhcDate[k] + hrs3 # Same process as above low = nmhcDate[k] - hrs3 methaneAverage = np.mean(methane[(methaneDate[:] <= high) & (methaneDate[:] >= low)]) aceMethane[np.where(numYears == i), k] = value / methaneAverage nmhcDate = (nmhcDate / 60 / 60 / 24) + (i - 2012) * (365 + isleapyear(i)) location = datesFinal > np.Inf location[np.where(numYears == i), 0:np.size(nmhcDate)] = True np.place(datesFinal, location, nmhcDate) return ethaneMethane, aceMethane, datesFinal