Ejemplo n.º 1
0
    def adjust_data(self, data_with_variables):
        """
        :param data_with_variables: receives the dataframe containing the data and adjust the units
        :return: the updated dataframe
        """
        # converts temperature from Kelvin to Degrees Celsius
        data_with_variables['Temperature_isobaric'] = data_with_variables['Temperature_isobaric'].apply(
            lambda x: x - 273.015)

        # converts pressure from Pa to hPa
        data_with_variables['pressure'] = data_with_variables['pressure'].apply(
            lambda x: x / 100)

        # dictionary with the units used to apply into the mpcalc function
        # the package metpy functions will only deal with data with its units associated
        variables_list = data_with_variables.columns
        variables_units_dict = dict(zip(variables_list, self.units))

        # Attach units to data into the dataframe and return united arrays
        data_with_variables = pandas_dataframe_to_unit_arrays(
            data_with_variables, variables_units_dict)

        # Calculate the ambient dewpoint given air temperature and relative humidity.
        data_with_variables['Dewpoint'] = mpcalc.dewpoint_from_relative_humidity(
            data_with_variables['Temperature_isobaric'],
            data_with_variables['Relative_humidity_isobaric'])

        # converto to pandas dataframe again as the plt_skew() metpy function suggests
        adjusted_data = pd.DataFrame(data_with_variables)
        return adjusted_data
Ejemplo n.º 2
0
def test_pandas_units_on_dataframe_not_all_united():
    """Unit attachment with units attribute with a column with no units."""
    df = pd.DataFrame(data=[[1, 4], [2, 5], [3, 6]], columns=['cola', 'colb'])
    df.units = {'cola': 'kilometers'}
    res = pandas_dataframe_to_unit_arrays(df)
    cola_truth = np.array([1, 2, 3]) * units.km
    colb_truth = np.array([4, 5, 6])
    assert_array_equal(res['cola'], cola_truth)
    assert_array_equal(res['colb'], colb_truth)
Ejemplo n.º 3
0
def test_pandas_units_on_dataframe():
    """Unit attachment based on a units attribute to a dataframe."""
    df = pd.DataFrame(data=[[1, 4], [2, 5], [3, 6]], columns=['cola', 'colb'])
    df.units = {'cola': 'kilometers', 'colb': 'degC'}
    res = pandas_dataframe_to_unit_arrays(df)
    cola_truth = np.array([1, 2, 3]) * units.km
    colb_truth = np.array([4, 5, 6]) * units.degC
    assert_array_equal(res['cola'], cola_truth)
    assert_array_equal(res['colb'], colb_truth)
Ejemplo n.º 4
0
def test_pandas_units_simple():
    """Simple unit attachment to two columns."""
    df = pd.DataFrame(data=[[1, 4], [2, 5], [3, 6]], columns=['cola', 'colb'])
    df_units = {'cola': 'kilometers', 'colb': 'degC'}
    res = pandas_dataframe_to_unit_arrays(df, column_units=df_units)
    cola_truth = np.array([1, 2, 3]) * units.km
    colb_truth = np.array([4, 5, 6]) * units.degC
    assert_array_equal(res['cola'], cola_truth)
    assert_array_equal(res['colb'], colb_truth)
Ejemplo n.º 5
0
def attach_units(df):
    """
    returns:
    a dataframe with unit attached to pres, temp, and hum
    for metpy calculation
    """
    pres_l = df['PRES'].values
    temp_1 = df['TEMP'].values
    hum_l = df['HUM'].values

    dic = {'PRES': pres_l, 'TEMP': temp_1, 'HUM': hum_l}
    df_new = pd.DataFrame(data=dic)
    #print(df_new['HUM'].values)
    my_units = {'PRES': 'hPa', 'TEMP': 'degC', 'HUM': 'percent'}

    my_united_data = pandas_dataframe_to_unit_arrays(df_new,
                                                     column_units=my_units)

    # type(my_united_data) == dict
    return my_united_data
Ejemplo n.º 6
0
def test_pandas_units_no_units_given():
    """Ensure unit attachment fails if no unit information is given."""
    df = pd.DataFrame(data=[[1, 4], [2, 5], [3, 6]], columns=['cola', 'colb'])
    with pytest.raises(ValueError):
        pandas_dataframe_to_unit_arrays(df)
Ejemplo n.º 7
0
dt = datetime(2016, 10, 26, 12)
station = 'MPX'

######################################################################
# Grab Remote Data
# ----------------
#
# This requires an internet connection to access the sounding data from a
# remote server at the University of Wyoming.
#

# Read remote sounding data based on time (dt) and station
df = WyomingUpperAir.request_data(dt, station)

# Create dictionary of united arrays
data = pandas_dataframe_to_unit_arrays(df)

######################################################################
# Isolate variables and attach units
#

# Isolate united arrays from dictionary to individual variables
p = data['pressure']
T = data['temperature']
Td = data['dewpoint']
u = data['u_wind']
v = data['v_wind']

######################################################################
# Make Skew-T Plot
# ----------------
Ejemplo n.º 8
0
def text_file_parse(file,
                    year=datetime.now().year,
                    month=datetime.now().month):
    """ Takes a text file taken from the NOAA PORT system containing
    METAR data and creates a dataframe with all the observations

    parameters
    ----------
    file: string
          The path to the file containing the data. It should be extracted
          from NOAA PORT and NOT be in binary format

    return
    ---------
    df : pandas dataframe wtih the station id as the index

    """
    import pandas as pd
    import numpy as np
    from metar_decode import ParseError
    from metar_parse import parse_metar_to_named_tuple
    from process_stations import station_dict
    from datetime import datetime
    from calculations import altimeter_to_slp
    from metpy.units import units, pandas_dataframe_to_unit_arrays

    #Function to merge METARs
    def merge(x, key='     '):
        tmp = []
        for i in x:
            if (i[0:len(key)] != key) and len(tmp):
                yield ' '.join(tmp)
                tmp = []
            if i.startswith(key):
                i = i[5:]
            tmp.append(i)
        if len(tmp):
            yield ' '.join(tmp)

    #Open the file
    myfile = open(file)

    #Clean up the file and take out the next line (\n)
    value = myfile.read().rstrip()
    list_values = value.split(sep='\n')
    list_values = list(filter(None, list_values))

    #Call the merge function and assign the result to the list of metars
    list_values = list(merge(list_values))

    #Remove the short lines that do not contain METAR observations or contain
    #METAR observations that lack a robust amount of data
    metars = []
    for metar in list_values:
        if len(metar) > 25:
            metars.append(metar)
    else:
        None

    #Create a dictionary with all the station name, locations, and elevations
    master = station_dict()

    #Setup lists to append the data to
    station_id = []
    lat = []
    lon = []
    elev = []
    date_time = []
    wind_dir = []
    wind_spd = []
    current_wx1 = []
    current_wx2 = []
    current_wx3 = []
    skyc1 = []
    skylev1 = []
    skyc2 = []
    skylev2 = []
    skyc3 = []
    skylev3 = []
    skyc4 = []
    skylev4 = []
    cloudcover = []
    temp = []
    dewp = []
    altim = []
    current_wx1_symbol = []
    current_wx2_symbol = []
    current_wx3_symbol = []

    for metar in metars:
        try:
            metar = parse_metar_to_named_tuple(metar,
                                               master,
                                               year=year,
                                               month=month)
            station_id.append(metar.station_id)
            lat.append(metar.latitude)
            lon.append(metar.longitude)
            elev.append(metar.elevation)
            date_time.append(metar.date_time)
            wind_dir.append(metar.wind_direction)
            wind_spd.append(metar.wind_speed)
            current_wx1.append(metar.current_wx1)
            current_wx2.append(metar.current_wx2)
            current_wx3.append(metar.current_wx3)
            skyc1.append(metar.skyc1)
            skylev1.append(metar.skylev1)
            skyc2.append(metar.skyc2)
            skylev2.append(metar.skylev2)
            skyc3.append(metar.skyc3)
            skylev3.append(metar.skylev3)
            skyc4.append(metar.skyc4)
            skylev4.append(metar.skylev4)
            cloudcover.append(metar.cloudcover)
            temp.append(metar.temperature)
            dewp.append(metar.dewpoint)
            altim.append(metar.altimeter)
            current_wx1_symbol.append(metar.current_wx1_symbol)
            current_wx2_symbol.append(metar.current_wx2_symbol)
            current_wx3_symbol.append(metar.current_wx3_symbol)

        except ParseError:
            None

    col_units = {
        'station_id': None,
        'latitude': 'degrees',
        'longitude': 'degrees',
        'elevation': 'meters',
        'date_time': None,
        'wind_direction': 'degrees',
        'wind_speed': 'kts',
        'current_wx1': None,
        'current_wx2': None,
        'current_wx3': None,
        'skyc1': None,
        'skylev1': 'feet',
        'skyc2': None,
        'skylev2': 'feet',
        'skyc3': None,
        'skylev3': 'feet',
        'skyc4': None,
        'skylev4:': None,
        'cloudcover': None,
        'temperature': 'degC',
        'dewpoint': 'degC',
        'altimeter': 'inHg',
        'sea_level_pressure': 'hPa',
        'current_wx1_symbol': None,
        'current_wx2_symbol': None,
        'current_wx3_symbol': None,
    }

    df = pd.DataFrame(
        {
            'station_id': station_id,
            'latitude': lat,
            'longitude': lon,
            'elevation': elev,
            'date_time': date_time,
            'wind_direction': wind_dir,
            'wind_speed': wind_spd,
            'current_wx1': current_wx1,
            'current_wx2': current_wx2,
            'current_wx3': current_wx3,
            'skyc1': skyc1,
            'skylev1': skylev1,
            'skyc2': skyc2,
            'skylev2': skylev2,
            'skyc3': skyc3,
            'skylev3': skylev3,
            'skyc4': skyc4,
            'skylev4': skylev4,
            'cloudcover': cloudcover,
            'temperature': temp,
            'dewpoint': dewp,
            'altimeter': altim,
            'current_wx1_symbol': current_wx2_symbol,
            'current_wx2_symbol': current_wx2_symbol,
            'current_wx3_symbol': current_wx3_symbol
        },
        index=station_id)

    try:
        df['sea_level_pressure'] = altimeter_to_slp(altim * units('inHg'),
                                                    elev * units('meters'),
                                                    temp *
                                                    units('degC')).magnitude
    except:
        df['sea_level_pressure'] = [np.nan]
    #Drop duplicates
    df = df.drop_duplicates(subset=['date_time', 'latitude', 'longitude'],
                            keep='last')

    df['altimeter'] = df.altimeter.round(2)
    df['sea_level_pressure'] = df.sea_level_pressure.round(2)

    #Convert the datetime string to a datetime object
    #df['date_time'] = pd.to_datetime(myfile.name[-17:-8] + df['time_utc'], format = "%Y%m%d_%H%M", exact=False)
    df.index = df.station_id

    #Set the units for the dataframe
    df.units = col_units
    pandas_dataframe_to_unit_arrays(df)

    return df
Ejemplo n.º 9
0
    def as_dataframe(self,
                     index=0,
                     date=None,
                     relativeTime=False,
                     relativeElevation=False,
                     asUnitArray=False):

        gj = self.as_geojson(index=index, date=date)

        # pre-version 2 format files had Pa instead of hPa,
        # and no "fmt" attribute
        # normalize on hPa:
        if "fmt" in gj["properties"]:
            pscale = 1.
        else:
            pscale = 100.

        t0 = gj["features"][0]['properties']['time']
        e0 = gj["features"][0]['geometry']['coordinates'][2]
        flat = []
        for f in gj["features"]:
            v = {}
            v['longitude'] = f.geometry.coordinates[0]
            v['latitude'] = f.geometry.coordinates[1]
            if relativeElevation:
                v['elevation'] = f.geometry.coordinates[2] - e0
            else:
                v['elevation'] = f.geometry.coordinates[2]

            for l, r in [('pressure', 'pressure'), ('gpheight', 'gpheight'),
                         ('temperature', 'temp'), ('dewpoint', 'dewpoint'),
                         ('u_wind', 'wind_u'), ('v_wind', 'wind_v'),
                         ('latitude', 'latitude'), ('longitude', 'longitude')]:
                try:
                    v[l] = f.properties[r]
                except KeyError as e:
                    v[l] = float('nan')

            if relativeTime:
                v['time'] = f.properties['time'] - t0
            else:
                v['time'] = datetime.utcfromtimestamp(
                    f.properties['time']).replace(tzinfo=pytz.utc)
            v['pressure'] = f.properties['pressure'] / pscale
            flat.append(v)

        col_names = [
            'pressure', 'gpheight', 'temperature', 'dewpoint', 'u_wind',
            'v_wind', 'time', 'latitude', 'longitude', 'elevation'
        ]
        df = pd.DataFrame(flat, columns=col_names)
        units = {
            'pressure': 'hPa',
            'gpheight': 'meter',
            'temperature': 'kelvin',
            'dewpoint': 'kelvin',
            'u_wind': 'm/s',
            'v_wind': 'm/s',
            'time': None,
            'latitude': 'degrees',
            'longitude': 'degrees',
            'elevation': 'meter'
        }
        gj["properties"]["station_name"] = self.station_name

        if asUnitArray:
            return (pandas_dataframe_to_unit_arrays(df, column_units=units),
                    gj["properties"])
        else:
            return (df, gj["properties"])
Ejemplo n.º 10
0
def parse_metar_to_pandas(metar_text, year = datetime.now().year, month = datetime.now().month):
    """Takes in a metar file, in a text form, and creates a pandas
    dataframe that can be easily subset

    Input:
    metar_text = string with the METAR data
    create_df = True or False
        True creates a Pandas dataframe as the Output
        False creates a list of lists containing the values in the following order:

        [station_id, latitude, longitude, elevation, date_time, day, time_utc,
        wind_direction, wind_speed, wxsymbol1, wxsymbol2, skycover1, skylevel1,
        skycover2, skylevel2, skycover3, skylevel3, skycover4, skylevel4,
        cloudcover, temperature, dewpoint, altimeter_value, sea_level_pressure]

    Output:
    Pandas Dataframe that can be subset easily
    """
    from datetime import datetime

    #Create a dictionary with all the station metadata
    station_metadata = station_dict()

    # Decode the data using the parser (built using Canopy)
    tree = parse(metar_text)

    #Station ID, Latitude, Longitude, and Elevation
    if tree.siteid.text == '':
        station_id = [np.nan]
    else:
        station_id = [tree.siteid.text.strip()]
        #Extract the latitude and longitude values from "master" dictionary
        try:
            lat = station_metadata[tree.siteid.text.strip()].latitude
            lon = station_metadata[tree.siteid.text.strip()].longitude
            elev = station_metadata[tree.siteid.text.strip()].altitude
        except:
            lat = np.nan
            lon = np.nan
            elev = np.nan

    # Set the datetime, day, and time_utc
    if tree.datetime.text == '':
        datetime = np.nan
        day = np.nan
        time_utc = np.nan
    else:
        day_time_utc = tree.datetime.text[:-1].strip()
        day = int(day_time_utc[0:2])
        hour = int(day_time_utc[2:4])
        minute = int(day_time_utc[4:7])
        date_time = datetime(year, month, day, hour, minute)

    # Set the wind variables
    if tree.wind.text == '':
        wind_dir = np.nan
        wind_spd = np.nan
    elif (tree.wind.text == '/////KT') or (tree.wind.text ==' /////KT') or (tree.wind.text == 'KT'):
        wind_dir = np.nan
        wind_spd = np.nan
    else:
        if (tree.wind.wind_dir.text == 'VRB') or (tree.wind.wind_dir.text == 'VAR'):
            wind_dir = np.nan
            wind_spd = float(tree.wind.wind_spd.text)
        else:
            wind_dir = int(tree.wind.wind_dir.text)
            wind_spd = int(tree.wind.wind_spd.text)

    # Set the weather symbols
    if tree.curwx.text == '':
        current_wx1 = np.nan
        current_wx2 = np.nan
        current_wx3 = np.nan
        current_wx1_symbol = np.nan
        current_wx2_symbol = np.nan
        current_wx3_symbol = np.nan
    else:
        wx = [np.nan, np.nan, np.nan]
        wx[0:len((tree.curwx.text.strip()).split())] = tree.curwx.text.strip().split()
        current_wx1 = wx[0]
        current_wx2 = wx[1]
        current_wx3 = wx[2]
        try:
            current_wx1_symbol = int(wx_code_map[wx[0]])
        except:
            current_wx1_symbol = np.nan
        try:
            current_wx2_symbol = int(wx_code_map[wx[1]])
        except:
            current_wx2_symbol = np.nan
        try:
            current_wx3_symbol = int(wx_code_map[wx[3]])
        except:
            current_wx3_symbol = np.nan

    # Set the sky conditions
    if tree.skyc.text == '':
        skyc1 = np.nan
        skylev1 = np.nan
        skyc2 = np.nan
        skylev2 = np.nan
        skyc3 = np.nan
        skylev3 = np.nan
        skyc4 = np.nan
        skylev4 = np.nan

    elif tree.skyc.text[1:3] == 'VV':
        skyc1 = 'VV'
        skylev1 = tree.skyc.text.strip()[2:]
        skyc2 = np.nan
        skylev2 = np.nan
        skyc3 = np.nan
        skylev3 = np.nan
        skyc4 = np.nan
        skylev4 = np.nan

    else:
        skyc = []
        skyc[0:len((tree.skyc.text.strip()).split())] = tree.skyc.text.strip().split()
        try:
            skyc1 = skyc[0][0:3]
            skylev1 = float(skyc[0][3:])*100
        except:
            skyc1 = np.nan
            skylev1 = np.nan
        try:
            skyc2 = skyc[1][0:3]
            skylev2 = float(skyc[1][3:])*100
        except:
            skyc2 = np.nan
            skylev2 = np.nan
        try:
            skyc3 = skyc[2][0:3]
            skylev3 = float(skyc[2][3:])*100
        except:
            skyc3 = np.nan
            skylev3 = np.nan
        try:
            skyc4 = skyc[3][0:3]
            skylev4 = float(skyc[3][3:])*100
        except:
            skyc4 = np.nan
            skylev4 = np.nan


    if ('OVC' or 'VV') in tree.skyc.text:
        cloudcover = 8
    elif 'BKN' in tree.skyc.text:
        cloudcover = 6
    elif 'SCT' in tree.skyc.text:
        cloudcover = 4
    elif 'FEW' in tree.skyc.text:
        cloudcover = 2
    elif ('SKC' in tree.skyc.text) or ('NCD' in tree.skyc.text) \
    or ('NSC' in tree.skyc.text) or ('CLR') in tree.skyc.text:
        cloudcover = 2
    else:
        cloudcover = np.nan

    # Set the temperature and dewpoint
    if (tree.temp_dewp.text == '') or (tree.temp_dewp.text == ' MM/MM'):
        temp = np.nan
        dewp = np.nan
    else:
        try:
            if "M" in tree.temp_dewp.temp.text:
                temp = (-1 * float(tree.temp_dewp.temp.text[-2:]))
            else:
                temp = float(tree.temp_dewp.temp.text[-2:])
        except:
            temp = np.nan
        try:
            if "M" in tree.temp_dewp.dewp.text:
                dewp = (-1 * float(tree.temp_dewp.dewp.text[-2:]))
            else:
                dewp = float(tree.temp_dewp.dewp.text[-2:])
        except:
            dewp = np.nan

    # Set the altimeter value and sea level pressure
    if tree.altim.text == '':
        altim = np.nan
    else:
        if (float(tree.altim.text.strip()[1:5])) > 1100:
            altim = (float(tree.altim.text.strip()[1:5]) / 100)
        else:
            altim = ((int(tree.altim.text.strip()[1:5])*units.hPa).to('inHg').magnitude)

    col_units = {
    'station_id': None,
    'lat': 'degrees',
    'lon': 'degrees',
    'elev': 'meters',
    'date_time': None,
    'day': None,
    'time_utc': None,
    'wind_dir': 'degrees',
    'wind_spd': 'kts',
    'current_wx1': None,
    'current_wx2': None,
    'current_wx3': None,
    'skyc1': None,
    'skylev1': 'feet',
    'skyc2': None,
    'skylev2': 'feet',
    'skyc3': None,
    'skylev3': 'feet',
    'skyc4': None,
    'skylev4:': None,
    'cloudcover': None,
    'temp': 'degC',
    'dewp': 'degC',
    'altim': 'inHg',
    'current_wx1_symbol': None,
    'current_wx2_symbol': None,
    'current_wx3_symbol': None,
    'slp': 'hectopascals'}

    df = pd.DataFrame({'station_id':station_id, 'latitude':lat,
    'longitude':lon, 'elevation':elev, 'date_time':date_time,
    'wind_direction':wind_dir, 'wind_speed':wind_spd,'current_wx1':current_wx1,
    'current_wx2':current_wx2, 'current_wx3':current_wx3, 'skyc1':skyc1,
    'skylev1':skylev1, 'skyc2':skyc2, 'skylev2':skylev2, 'skyc3':skyc3,
    'skylev3': skylev3, 'skyc4':skyc4, 'skylev4':skylev4,
    'cloudcover':cloudcover, 'temperature':temp, 'dewpoint':dewp,
    'altimeter':altim, 'current_wx1_symbol':current_wx2_symbol,
    'current_wx2_symbol':current_wx2_symbol, 'current_wx3_symbol':current_wx3_symbol},
    index = station_id)

    try:
        df['sea_level_pressure'] = float(format(altimeter_to_slp(
        altim * units('inHg'),
        elev * units('meters'),
        temp * units('degC')).magnitude, '.1f'))
    except:
        df['sea_level_pressure'] = [np.nan]

    df['altimeter'] = df.altimeter.round(2)
    df['sea_level_pressure'] = df.sea_level_pressure.round(2)

    df.index = df.station_id

    #Set the units for the dataframe
    df.units = col_units
    pandas_dataframe_to_unit_arrays(df)

    return df