Beispiel #1
0
def read_observations(years, usaf='081810', wban='99999'):
    parser = ish_parser()

    for year in years:
        path = "../data/observations/{usaf}-{wban}-{year}.gz".format(year=year,
                                                                     usaf=usaf,
                                                                     wban=wban)
        with gzip.open(path) as gz:
            parser.loads(bytes.decode(gz.read()))

    reports = parser.get_reports()

    station_latitudes = [41.283, 41.293]
    # station_latitudes = [40.080, 40.090]
    observations = pd.DataFrame.from_records(
        ((r.datetime, r.air_temperature.get_numeric(),
          (r.precipitation[0]['depth'].get_numeric() if r.precipitation else
           0), r.humidity.get_numeric(), r.sea_level_pressure.get_numeric(),
          r.wind_speed.get_numeric(), r.wind_direction.get_numeric())
         for r in reports
         if r.latitude in station_latitudes and r.datetime.minute == 0),
        columns=[
            'timestamp', 'AT', 'precipitation', 'humidity', 'pressure',
            'wind_speed', 'wind_direction'
        ],
        # columns=['timestamp', 'AT', 'precipitation', 'humidity', 'wind_speed', 'wind_direction'],
        index='timestamp')

    return observations
 def test_other_airport(self):
     with open(self.AUS_FILE) as fp:
         content = fp.read()
     wf = ish_parser()
     wf.loads(content)
     self.assertEqual(len(wf.get_reports()), 4237)
     self.assertEqual(type(wf.get_reports()[10]), ish_report)
     self.assertEqual(len(wf.get_observations()), 3333)
Beispiel #3
0
 def test_other_airport(self):
   with open(self.AUS_FILE) as fp:
     content = fp.read()
   wf = ish_parser()
   wf.loads(content)
   self.assertEqual(len(wf.get_reports()), 4237)
   self.assertEqual(type(wf.get_reports()[10]), ish_report)
   self.assertEqual(len(wf.get_observations()), 3333)
 def test_random_other_file(self):
     ''' test that we can load another random old file with no problems 
 from 30 years ago '''
     with open(self.OTHER_RANDOM) as fp:
         content = fp.read()
     wf = ish_parser()
     wf.loads(content)
     self.assertEqual(len(wf.get_reports()), 2816)
     self.assertEqual(type(wf.get_reports()[10]), ish_report)
Beispiel #5
0
 def test_random_other_file(self):
   ''' test that we can load another random old file with no problems 
   from 30 years ago '''
   with open(self.OTHER_RANDOM) as fp:
     content = fp.read()
   wf = ish_parser()
   wf.loads(content)
   self.assertEqual(len(wf.get_reports()), 2816)
   self.assertEqual(type(wf.get_reports()[10]), ish_report)
Beispiel #6
0
 def test_from_file(self):
   ''' test that we can load a weather file from a file '''
   with open(self.ORD_FILE) as fp:
     content = fp.read()
   wf = ish_parser()
   wf.loads(content)
   self.assertEqual(len(wf.get_reports()), 4262)
   self.assertEqual(type(wf.get_reports()[10]), ish_report)
   self.assertEqual(len(wf.get_observations()), 3135)
 def test_from_file(self):
     ''' test that we can load a weather file from a file '''
     with open(self.ORD_FILE) as fp:
         content = fp.read()
     wf = ish_parser()
     wf.loads(content)
     self.assertEqual(len(wf.get_reports()), 4262)
     self.assertEqual(type(wf.get_reports()[10]), ish_report)
     self.assertEqual(len(wf.get_observations()), 3135)
Beispiel #8
0
 def test_another_weird_file(self):
   ''' test that we can load another random old file with no problems 
   from 30 years ago '''
   with open(self.OTHER_BUG) as fp:
     content = fp.read()
   wf = ish_parser()
   wf.loads(content)
   self.assertEqual(len(wf.get_reports()), 8580)
   one_report = wf.get_reports()[22]
   self.assertEqual(one_report.air_temperature.get_fahrenheit(), 64.4)
Beispiel #9
0
 def test_random_old_file(self):
   ''' test that we can load another random old file with no problems 
   from 30 years ago '''
   with open(self.OLDRANDOMFILE) as fp:
     content = fp.read()
   wf = ish_parser()
   wf.loads(content)
   self.assertEqual(len(wf.get_reports()), 8760)
   self.assertEqual(type(wf.get_reports()[10]), ish_report)
   self.assertEqual(len(wf.get_observations()), 7466)
Beispiel #10
0
 def test_another_weird_file(self):
     ''' test that we can load another random old file with no problems 
 from 30 years ago '''
     with open(self.OTHER_BUG) as fp:
         content = fp.read()
     wf = ish_parser()
     wf.loads(content)
     self.assertEqual(len(wf.get_reports()), 8580)
     one_report = wf.get_reports()[22]
     self.assertEqual(one_report.air_temperature.get_fahrenheit(), 64.4)
Beispiel #11
0
 def test_random_old_file(self):
     ''' test that we can load another random old file with no problems 
 from 30 years ago '''
     with open(self.OLDRANDOMFILE) as fp:
         content = fp.read()
     wf = ish_parser()
     wf.loads(content)
     self.assertEqual(len(wf.get_reports()), 8760)
     self.assertEqual(type(wf.get_reports()[10]), ish_report)
     self.assertEqual(len(wf.get_observations()), 7466)
Beispiel #12
0
 def test_file_throwing_problems(self):
   ''' test a file that was getting stuck in crazy infinite recursion '''
   with open(self.RECURSIONBUG) as fp:
     content = fp.read()
   wf = ish_parser()
   wf.loads(content)
   self.assertEqual(len(wf.get_reports()), 4410)
   self.assertEqual(type(wf.get_reports()[10]), ish_report)
   
   one_report = wf.get_reports()[22]
   self.assertEqual(one_report.air_temperature.get_fahrenheit(), 'MISSING')
Beispiel #13
0
    def test_file_throwing_problems(self):
        ''' test a file that was getting stuck in crazy infinite recursion '''
        with open(self.RECURSIONBUG) as fp:
            content = fp.read()
        wf = ish_parser()
        wf.loads(content)
        self.assertEqual(len(wf.get_reports()), 4410)
        self.assertEqual(type(wf.get_reports()[10]), ish_report)

        one_report = wf.get_reports()[22]
        self.assertEqual(one_report.air_temperature.get_fahrenheit(),
                         'MISSING')
Beispiel #14
0
def readNOAA_ISH(USAF, WBAN, year):
	"""This function reads data from NOAA ISH data files for U.S.
	cities used for CONTAM modeling in the EPA indoor air quality
	project.
	
	Input:
		USAF - USAF station identifier
		WBAN - WBAN station identifier
		year - Desired year, e.g., 2010

	Written by Von P. Walden, Washington State University
	           12 Nov 2017

	"""

	from ish_parser import ish_parser
	import pandas as pd
	import numpy  as np
	
	# Construct filename of the desired data and read entire file.
	fn      = '/Volumes/vonw/data/iaq/NCDC/ish/3505v2' + str(USAF) + '-' + str(WBAN) + str(year) + '.op'
	f       = open(fn)
	content = f.read()
	f.close()
	
	# Read the observations from the desired file.
	wf   = ish_parser()
	wf.loads(content)
	obs  = wf.get_observations()

    # Create a datetime index.
	#
	time = np.array([ob.datetime for ob in obs])

    # ............................... WEATHER DATA ............................
    #
	T    = np.array([ob.air_temperature.get_numeric() for ob in obs])
	P    = np.array([ob.sea_level_pressure.get_numeric() for ob in obs])
	wspd = np.array([ob.wind_speed.get_numeric() for ob in obs])
	wdir = np.array([ob.wind_direction.get_numeric() for ob in obs])
Beispiel #15
0
 def test_issues_with_missing_at1(self):
   with open(self.AT1_ERROR) as fp:
     content = fp.read()
   wf = ish_parser()
   wf.loads(content)
   self.assertEqual(len(wf.get_reports()), 154)
Beispiel #16
0
def readNOAA_ISH(USAF, WBAN, year):
    """This function reads data from NOAA ISH data files for U.S.
    cities used for CONTAM modeling in the EPA indoor air quality
    project.
    
    Input:
        USAF - USAF station identifier  (as a string)
        WBAN - WBAN station identifier  (as a string)
        year - Desired year, e.g., 2010 (as an integer)

    Written by Von P. Walden, Washington State University
               12 Nov 2017

    """
    """
    isd-history-IAQ.csv
        "CITY","USAF","WBAN","STATION NAME","CTRY","STATE","ICAO","LAT","LON","ELEV(M)","BEGIN","END"
        "Chicago","725300","94846","CHICAGO O'HARE INTERNATIONAL AIRPORT","US","IL","KORD","+41.995","-087.934","+0201.8","19461001","20171107"
        "Cincinnati","724210","93814","CINCINNATI/NORTHERN KENTUCKY INTL AP","US","KY","KCVG","+39.044","-084.672","+0269.1","19730101","20171107"
        "Nashville","723270","13897","NASHVILLE INTERNATIONAL AIRPORT","US","TN","KBNA","+36.119","-086.689","+0182.9","19510101","20171108"
        "Birmingham","722280","13876","BIRMINGHAM INTERNATIONAL AIRPORT","US","AL","KBHM","+33.566","-086.745","+0187.5","19420801","20171107"
        "NewYork","725030","14732","LA GUARDIA AIRPORT","US","NY","KLGA","+40.779","-073.880","+0003.4","19730101","20171107"
        "Buffalo","725280","14733","BUFFALO NIAGARA INTERNATIONAL AP","US","NY","KBUF","+42.941","-078.736","+0218.2","19420201","20171107"
        "Phoenix","722780","23183","PHOENIX SKY HARBOR INTL AIRPORT","US","AZ","KPHX","+33.428","-112.004","+0337.4","19730101","20171107"
        "Denver","725650","03017","DENVER INTERNATIONAL AIRPORT","US","CO","KDEN","+39.833","-104.658","+1650.2","19940718","20171107"
        "Boston","725090","14739","GEN E L LOGAN INTERNATIONAL AIRPORT","US","MA","KBOS","+42.361","-071.010","+0003.7","19431121","20171107"
        "Worcester","725100","94746","WORCESTER REGIONAL AIRPORT","US","MA","KORH","+42.271","-071.873","+0304.8","20100801","20171107"
        "LosAngeles","722950","23174","LOS ANGELES INTERNATIONAL AIRPORT","US","CA","KLAX","+33.938","-118.389","+0029.6","19440101","20171107"
        "Seattle","727930","24233","SEATTLE-TACOMA INTERNATIONAL AIRPORT","US","WA","KSEA","+47.444","-122.314","+0112.8","19480101","20171107"
        "Miami","722020","12839","MIAMI INTERNATIONAL AIRPORT","US","FL","KMIA","+25.791","-080.316","+0008.8","19730101","20171107"
        "WashingtonDC","724030","93738","WASHINGTON DULLES INTERNATIONAL AP","US","VA","KIAD","+38.935","-077.447","+0088.4","19730101","20171107"
        "Atlanta","722190","13874","HARTSFIELD-JACKSON ATLANTA INTL AP","US","GA","KATL","+33.630","-084.442","+0307.9","19730101","20171108"
        "Minneapolis","726580","14922","MINNEAPOLIS-ST PAUL INTERNATIONAL AP","US","MN","KMSP","+44.883","-093.229","+0265.8","19450101","20171107"
        "StLouis","724340","13994","LAMBERT-ST LOUIS INTERNATIONAL AP","US","MO","KSTL","+38.753","-090.374","+0161.9","19730101","20171107"
        "Dallas","722590","03927","DALLAS/FT WORTH INTERNATIONAL AP","US","TX","KDFW","+32.898","-097.019","+0170.7","19730101","20171107"
        "CorpusChristi","722510","12924","CORPUS CHRISTI  INTERNATIONAL AIRPORT","US","TX","KCRP","+27.774","-097.512","+0013.4","19460801","20171107"
    """
    def pressureCorrection(Ps, Hstn, Tstn):
        """Calculate the station pressure in hPa from the sea-level pressure
        (Ps) and the station temperature (Tstn). The correction comes from
        http://www.weather.gov/media/epz/wxcalc/stationPressure.pdf. This
        correction was quickly checked against the hypsometric equation and was
        shown to be adequate; see pressureCorrectionTest.py.
        
        Inputs:
            Hstn - elevation (height) of weather station (meters)
            Ps   - sea-level pressure in Pa
            Tstn - temperature measured at the weather station (K)
        
        Output:
            Atmospheric pressure at the weather station
            
        Written by Von P. Walden, Washington State University
                    19 November 2017
        """
        Lrate = 0.0065 * Hstn  # LRate is the approximate lapse rate (K m-1)
        return Ps * ((Tstn - Lrate) / Tstn)**5.2561

    # NOAA ISH parser comes from:
    #    https://github.com/haydenth/ish_parser
    from ish_parser import ish_parser
    import pandas as pd
    import numpy as np

    # Construct filename of the desired data and read entire file.
    fn = '/Volumes/vonw/data/iaq/NCDC/ish/3505v2' + USAF + '-' + WBAN + str(
        year) + '.op'
    f = open(fn)
    content = f.read()
    f.close()

    # Read the observations from the desired file.
    wf = ish_parser()
    wf.loads(content)
    obs = wf.get_observations()

    # Create a datetime index.
    #
    time = np.array([ob.datetime for ob in obs])

    # ............................... WEATHER DATA ............................
    #
    Hstn = np.array([ob.elevation for ob in obs])  # meters
    T = np.array([ob.air_temperature.get_numeric() for ob in obs])  # deg C
    Ps = np.array([ob.sea_level_pressure.get_numeric()
                   for ob in obs]) * 100.  # Pa
    Pb = pressureCorrection(Ps, Hstn, T + 273.15)  # Pa
    wspd = np.array([ob.wind_speed.get_numeric() for ob in obs])  # m s-1
    wdir = np.array([ob.wind_direction.get_numeric() for ob in obs])  # degrees
    # Conversion from relative humidity to mixing ratio
    #    ....http://www.vaisala.com/Vaisala%20Documents/Application%20notes/Humidity_Conversion_Formulas_B210973EN-F.pdf
    A = 6.116441
    m = 7.591386
    Tn = 240.7263
    es = A * 10**(m * (T) / (T + Tn))
    ws = 0.622 * (es / Pb)
    w = np.array([
        ob.humidity.get_numeric() for ob in obs
    ]) * ws * 1000.  # Factor of 1000 converts from kg/kg to g/kg.
    # Calculation of air density
    Rd = 287.  # Gas constant for dry air; J kg-1 K-1
    rho = Pb / (Rd * (T + 273.15))
    # Create a pandas DataFrame that contains the weather data.
    wth = pd.DataFrame(
        {
            'Ta': T + 273.15,
            'Pb': Pb,
            'Ws': wspd,
            'Wd': wdir,
            'Hr': w,
            'rho': rho,
            'elevation': Hstn
        },
        index=time)

    # Resample the dataframe to an hourly time step.
    wth = wth.resample('H').mean()

    return wth
Beispiel #17
0
 def test_issues_with_missing_at1(self):
     with open(self.AT1_ERROR) as fp:
         content = fp.read()
     wf = ish_parser()
     wf.loads(content)
     self.assertEqual(len(wf.get_reports()), 154)
Beispiel #18
0
def get_data(year, STATION_ID_LIST, keys, destinationPath):
    import ftplib
    import io
    import gzip
    import ish_parser  # from: https://github.com/haydenth/ish_parser
    import os.path
    ftp_host = "ftp.ncdc.noaa.gov"
    parser = ish_parser.ish_parser()

    with ftplib.FTP(host=ftp_host, timeout=600) as ftpconn:
        ftpconn.login()
        counting = 0
        #initilize dict for each year
        data_year = {}
        print('Downloading year ', year)
        LastReportLength = 0
        for station_id in STATION_ID_LIST:
            ftp_file = "pub/data/noaa/{YEAR}/{ID}-{YEAR}.gz".format(
                ID=station_id, YEAR=year)

            #show file download and percentage of completention
            counting += 1
            percentage_done = counting / len(STATION_ID_LIST) * 100
            if int(len(STATION_ID_LIST) / 10) == 0:
                dummy_number = 1
            else:
                dummy_number = int(len(STATION_ID_LIST) / 10)

            if counting % dummy_number == 0:
                print(round(percentage_done, 1), "% completed....")

            # read the whole file and save it to a BytesIO (stream)
            response = io.BytesIO()
            try:
                ftpconn.retrbinary('RETR ' + ftp_file, response.write)
            except ftplib.error_perm as err:
                if str(err).startswith('550 '):
                    print('ERROR:', err)
                else:
                    raise

            # decompress and parse each line
            response.seek(0)  # jump back to the beginning of the stream
            with gzip.open(response, mode='rb') as gzstream:

                content = bytes.decode(gzstream.read())
                gzstream.close()

            #function from github which parses the nasty noaa data nicely for us.
            parser.loads(content)
            reports = parser.get_reports()
            #selects the reports corresponding to the alst station
            reports = reports[LastReportLength:]
            LastReportLength += len(reports)

            #initilize nested dict for current station
            data_year[station_id] = {}
            #shows all possible keys (just so we remeber, dont delete please(!))
            possible_keys = dir(reports[0])

            #initilizes keys inside nested dictionary
            data_year[station_id] = data_year[station_id].fromkeys(keys)
            #initilize keys as lists so they can fit the year long features
            for i in keys:
                data_year[station_id][i] = []

            #append the features in the appropraite dictionary
            #slowest process in here: big nested loop.. loop1~6k, loop2=len(keys)

            for report in reports:
                for key in keys:
                    #get numerical data from the parsed matrix
                    value = getattr(report, key)
                    if key == 'humidity':
                        value_inserted = value.humidity
                    else:
                        try:
                            value_inserted = value._obs_value
                        except AttributeError:

                            value_inserted = value

                    data_year[station_id][key].append(value_inserted)
                    #getattr(report,i) makes it: report.i using the variable i.
                    #report['air_temperature'] for example gives the air temperature of this line
            reports = None
            content = None
        return data_year