def read_observations(years, usaf='081810', wban='99999'): parser = ish_parser() for year in years: path = "../data/observations/{usaf}-{wban}-{year}.gz".format(year=year, usaf=usaf, wban=wban) with gzip.open(path) as gz: parser.loads(bytes.decode(gz.read())) reports = parser.get_reports() station_latitudes = [41.283, 41.293] # station_latitudes = [40.080, 40.090] observations = pd.DataFrame.from_records( ((r.datetime, r.air_temperature.get_numeric(), (r.precipitation[0]['depth'].get_numeric() if r.precipitation else 0), r.humidity.get_numeric(), r.sea_level_pressure.get_numeric(), r.wind_speed.get_numeric(), r.wind_direction.get_numeric()) for r in reports if r.latitude in station_latitudes and r.datetime.minute == 0), columns=[ 'timestamp', 'AT', 'precipitation', 'humidity', 'pressure', 'wind_speed', 'wind_direction' ], # columns=['timestamp', 'AT', 'precipitation', 'humidity', 'wind_speed', 'wind_direction'], index='timestamp') return observations
def test_other_airport(self): with open(self.AUS_FILE) as fp: content = fp.read() wf = ish_parser() wf.loads(content) self.assertEqual(len(wf.get_reports()), 4237) self.assertEqual(type(wf.get_reports()[10]), ish_report) self.assertEqual(len(wf.get_observations()), 3333)
def test_random_other_file(self): ''' test that we can load another random old file with no problems from 30 years ago ''' with open(self.OTHER_RANDOM) as fp: content = fp.read() wf = ish_parser() wf.loads(content) self.assertEqual(len(wf.get_reports()), 2816) self.assertEqual(type(wf.get_reports()[10]), ish_report)
def test_from_file(self): ''' test that we can load a weather file from a file ''' with open(self.ORD_FILE) as fp: content = fp.read() wf = ish_parser() wf.loads(content) self.assertEqual(len(wf.get_reports()), 4262) self.assertEqual(type(wf.get_reports()[10]), ish_report) self.assertEqual(len(wf.get_observations()), 3135)
def test_another_weird_file(self): ''' test that we can load another random old file with no problems from 30 years ago ''' with open(self.OTHER_BUG) as fp: content = fp.read() wf = ish_parser() wf.loads(content) self.assertEqual(len(wf.get_reports()), 8580) one_report = wf.get_reports()[22] self.assertEqual(one_report.air_temperature.get_fahrenheit(), 64.4)
def test_random_old_file(self): ''' test that we can load another random old file with no problems from 30 years ago ''' with open(self.OLDRANDOMFILE) as fp: content = fp.read() wf = ish_parser() wf.loads(content) self.assertEqual(len(wf.get_reports()), 8760) self.assertEqual(type(wf.get_reports()[10]), ish_report) self.assertEqual(len(wf.get_observations()), 7466)
def test_file_throwing_problems(self): ''' test a file that was getting stuck in crazy infinite recursion ''' with open(self.RECURSIONBUG) as fp: content = fp.read() wf = ish_parser() wf.loads(content) self.assertEqual(len(wf.get_reports()), 4410) self.assertEqual(type(wf.get_reports()[10]), ish_report) one_report = wf.get_reports()[22] self.assertEqual(one_report.air_temperature.get_fahrenheit(), 'MISSING')
def readNOAA_ISH(USAF, WBAN, year): """This function reads data from NOAA ISH data files for U.S. cities used for CONTAM modeling in the EPA indoor air quality project. Input: USAF - USAF station identifier WBAN - WBAN station identifier year - Desired year, e.g., 2010 Written by Von P. Walden, Washington State University 12 Nov 2017 """ from ish_parser import ish_parser import pandas as pd import numpy as np # Construct filename of the desired data and read entire file. fn = '/Volumes/vonw/data/iaq/NCDC/ish/3505v2' + str(USAF) + '-' + str(WBAN) + str(year) + '.op' f = open(fn) content = f.read() f.close() # Read the observations from the desired file. wf = ish_parser() wf.loads(content) obs = wf.get_observations() # Create a datetime index. # time = np.array([ob.datetime for ob in obs]) # ............................... WEATHER DATA ............................ # T = np.array([ob.air_temperature.get_numeric() for ob in obs]) P = np.array([ob.sea_level_pressure.get_numeric() for ob in obs]) wspd = np.array([ob.wind_speed.get_numeric() for ob in obs]) wdir = np.array([ob.wind_direction.get_numeric() for ob in obs])
def test_issues_with_missing_at1(self): with open(self.AT1_ERROR) as fp: content = fp.read() wf = ish_parser() wf.loads(content) self.assertEqual(len(wf.get_reports()), 154)
def readNOAA_ISH(USAF, WBAN, year): """This function reads data from NOAA ISH data files for U.S. cities used for CONTAM modeling in the EPA indoor air quality project. Input: USAF - USAF station identifier (as a string) WBAN - WBAN station identifier (as a string) year - Desired year, e.g., 2010 (as an integer) Written by Von P. Walden, Washington State University 12 Nov 2017 """ """ isd-history-IAQ.csv "CITY","USAF","WBAN","STATION NAME","CTRY","STATE","ICAO","LAT","LON","ELEV(M)","BEGIN","END" "Chicago","725300","94846","CHICAGO O'HARE INTERNATIONAL AIRPORT","US","IL","KORD","+41.995","-087.934","+0201.8","19461001","20171107" "Cincinnati","724210","93814","CINCINNATI/NORTHERN KENTUCKY INTL AP","US","KY","KCVG","+39.044","-084.672","+0269.1","19730101","20171107" "Nashville","723270","13897","NASHVILLE INTERNATIONAL AIRPORT","US","TN","KBNA","+36.119","-086.689","+0182.9","19510101","20171108" "Birmingham","722280","13876","BIRMINGHAM INTERNATIONAL AIRPORT","US","AL","KBHM","+33.566","-086.745","+0187.5","19420801","20171107" "NewYork","725030","14732","LA GUARDIA AIRPORT","US","NY","KLGA","+40.779","-073.880","+0003.4","19730101","20171107" "Buffalo","725280","14733","BUFFALO NIAGARA INTERNATIONAL AP","US","NY","KBUF","+42.941","-078.736","+0218.2","19420201","20171107" "Phoenix","722780","23183","PHOENIX SKY HARBOR INTL AIRPORT","US","AZ","KPHX","+33.428","-112.004","+0337.4","19730101","20171107" "Denver","725650","03017","DENVER INTERNATIONAL AIRPORT","US","CO","KDEN","+39.833","-104.658","+1650.2","19940718","20171107" "Boston","725090","14739","GEN E L LOGAN INTERNATIONAL AIRPORT","US","MA","KBOS","+42.361","-071.010","+0003.7","19431121","20171107" "Worcester","725100","94746","WORCESTER REGIONAL AIRPORT","US","MA","KORH","+42.271","-071.873","+0304.8","20100801","20171107" "LosAngeles","722950","23174","LOS ANGELES INTERNATIONAL AIRPORT","US","CA","KLAX","+33.938","-118.389","+0029.6","19440101","20171107" "Seattle","727930","24233","SEATTLE-TACOMA INTERNATIONAL AIRPORT","US","WA","KSEA","+47.444","-122.314","+0112.8","19480101","20171107" "Miami","722020","12839","MIAMI INTERNATIONAL AIRPORT","US","FL","KMIA","+25.791","-080.316","+0008.8","19730101","20171107" "WashingtonDC","724030","93738","WASHINGTON DULLES INTERNATIONAL AP","US","VA","KIAD","+38.935","-077.447","+0088.4","19730101","20171107" "Atlanta","722190","13874","HARTSFIELD-JACKSON ATLANTA INTL AP","US","GA","KATL","+33.630","-084.442","+0307.9","19730101","20171108" "Minneapolis","726580","14922","MINNEAPOLIS-ST PAUL INTERNATIONAL AP","US","MN","KMSP","+44.883","-093.229","+0265.8","19450101","20171107" "StLouis","724340","13994","LAMBERT-ST LOUIS INTERNATIONAL AP","US","MO","KSTL","+38.753","-090.374","+0161.9","19730101","20171107" "Dallas","722590","03927","DALLAS/FT WORTH INTERNATIONAL AP","US","TX","KDFW","+32.898","-097.019","+0170.7","19730101","20171107" "CorpusChristi","722510","12924","CORPUS CHRISTI INTERNATIONAL AIRPORT","US","TX","KCRP","+27.774","-097.512","+0013.4","19460801","20171107" """ def pressureCorrection(Ps, Hstn, Tstn): """Calculate the station pressure in hPa from the sea-level pressure (Ps) and the station temperature (Tstn). The correction comes from http://www.weather.gov/media/epz/wxcalc/stationPressure.pdf. This correction was quickly checked against the hypsometric equation and was shown to be adequate; see pressureCorrectionTest.py. Inputs: Hstn - elevation (height) of weather station (meters) Ps - sea-level pressure in Pa Tstn - temperature measured at the weather station (K) Output: Atmospheric pressure at the weather station Written by Von P. Walden, Washington State University 19 November 2017 """ Lrate = 0.0065 * Hstn # LRate is the approximate lapse rate (K m-1) return Ps * ((Tstn - Lrate) / Tstn)**5.2561 # NOAA ISH parser comes from: # https://github.com/haydenth/ish_parser from ish_parser import ish_parser import pandas as pd import numpy as np # Construct filename of the desired data and read entire file. fn = '/Volumes/vonw/data/iaq/NCDC/ish/3505v2' + USAF + '-' + WBAN + str( year) + '.op' f = open(fn) content = f.read() f.close() # Read the observations from the desired file. wf = ish_parser() wf.loads(content) obs = wf.get_observations() # Create a datetime index. # time = np.array([ob.datetime for ob in obs]) # ............................... WEATHER DATA ............................ # Hstn = np.array([ob.elevation for ob in obs]) # meters T = np.array([ob.air_temperature.get_numeric() for ob in obs]) # deg C Ps = np.array([ob.sea_level_pressure.get_numeric() for ob in obs]) * 100. # Pa Pb = pressureCorrection(Ps, Hstn, T + 273.15) # Pa wspd = np.array([ob.wind_speed.get_numeric() for ob in obs]) # m s-1 wdir = np.array([ob.wind_direction.get_numeric() for ob in obs]) # degrees # Conversion from relative humidity to mixing ratio # ....http://www.vaisala.com/Vaisala%20Documents/Application%20notes/Humidity_Conversion_Formulas_B210973EN-F.pdf A = 6.116441 m = 7.591386 Tn = 240.7263 es = A * 10**(m * (T) / (T + Tn)) ws = 0.622 * (es / Pb) w = np.array([ ob.humidity.get_numeric() for ob in obs ]) * ws * 1000. # Factor of 1000 converts from kg/kg to g/kg. # Calculation of air density Rd = 287. # Gas constant for dry air; J kg-1 K-1 rho = Pb / (Rd * (T + 273.15)) # Create a pandas DataFrame that contains the weather data. wth = pd.DataFrame( { 'Ta': T + 273.15, 'Pb': Pb, 'Ws': wspd, 'Wd': wdir, 'Hr': w, 'rho': rho, 'elevation': Hstn }, index=time) # Resample the dataframe to an hourly time step. wth = wth.resample('H').mean() return wth
def get_data(year, STATION_ID_LIST, keys, destinationPath): import ftplib import io import gzip import ish_parser # from: https://github.com/haydenth/ish_parser import os.path ftp_host = "ftp.ncdc.noaa.gov" parser = ish_parser.ish_parser() with ftplib.FTP(host=ftp_host, timeout=600) as ftpconn: ftpconn.login() counting = 0 #initilize dict for each year data_year = {} print('Downloading year ', year) LastReportLength = 0 for station_id in STATION_ID_LIST: ftp_file = "pub/data/noaa/{YEAR}/{ID}-{YEAR}.gz".format( ID=station_id, YEAR=year) #show file download and percentage of completention counting += 1 percentage_done = counting / len(STATION_ID_LIST) * 100 if int(len(STATION_ID_LIST) / 10) == 0: dummy_number = 1 else: dummy_number = int(len(STATION_ID_LIST) / 10) if counting % dummy_number == 0: print(round(percentage_done, 1), "% completed....") # read the whole file and save it to a BytesIO (stream) response = io.BytesIO() try: ftpconn.retrbinary('RETR ' + ftp_file, response.write) except ftplib.error_perm as err: if str(err).startswith('550 '): print('ERROR:', err) else: raise # decompress and parse each line response.seek(0) # jump back to the beginning of the stream with gzip.open(response, mode='rb') as gzstream: content = bytes.decode(gzstream.read()) gzstream.close() #function from github which parses the nasty noaa data nicely for us. parser.loads(content) reports = parser.get_reports() #selects the reports corresponding to the alst station reports = reports[LastReportLength:] LastReportLength += len(reports) #initilize nested dict for current station data_year[station_id] = {} #shows all possible keys (just so we remeber, dont delete please(!)) possible_keys = dir(reports[0]) #initilizes keys inside nested dictionary data_year[station_id] = data_year[station_id].fromkeys(keys) #initilize keys as lists so they can fit the year long features for i in keys: data_year[station_id][i] = [] #append the features in the appropraite dictionary #slowest process in here: big nested loop.. loop1~6k, loop2=len(keys) for report in reports: for key in keys: #get numerical data from the parsed matrix value = getattr(report, key) if key == 'humidity': value_inserted = value.humidity else: try: value_inserted = value._obs_value except AttributeError: value_inserted = value data_year[station_id][key].append(value_inserted) #getattr(report,i) makes it: report.i using the variable i. #report['air_temperature'] for example gives the air temperature of this line reports = None content = None return data_year