예제 #1
0
pp = pprint.PrettyPrinter(indent=2)

# Instance a Meso object by passing in YOUR api_token
m = Meso(token='YOUR TOKEN') # this token for testing only

# Here we retrieve only the stations in Larimer County, Colorado
stations = m.metadata(state='CO', county='Larimer')

# Calling variables() returns all possible sensor variables at stations
variables = m.variables()

# This returns a climatology for Denver from Apr 26 OOz to Apr 27 OOz
climate = m.climatology(stid='kden', startclim='04260000', endclim='04270000', units='precip|in')

# Fetches the latest obs for Fort Collins airport within 30 min of Apr 26 18z
attime = m.attime(stid='kfnl', attime='201504261800', within='30')

# Or just get the latest observation within the last 15 minutes
latest = m.latest(stid='kfnl', within='15')

# Returns a time series from Fort Collins airport from Apr 26 18z to Apr 26 23z
time = m.timeseries(stid='kfnl', start='201504261800', end='201504262300')

# Returns the precip obs from Fort Collins airport from Apr 26 18z to Apr 27 12z
precip = m.precip(stid='kfnl', start='201504261800', end='201504271200', units='precip|in')

# Learn more about all of the networks in MesoWest with the networks() func
networks = m.networks()

# Or explore the categories MesoWest networks belong to
nettypes = m.networktypes()
예제 #2
0
m = Meso(token='YOUR TOKEN')  # this token for testing only

# Here we retrieve only the stations in Larimer County, Colorado
stations = m.metadata(state='CO', county='Larimer')

# Calling variables() returns all possible sensor variables at stations
variables = m.variables()

# This returns a climatology for Denver from Apr 26 OOz to Apr 27 OOz
climate = m.climatology(stid='kden',
                        startclim='04260000',
                        endclim='04270000',
                        units='precip|in')

# Fetches the latest obs for Fort Collins airport within 30 min of Apr 26 18z
attime = m.attime(stid='kfnl', attime='201504261800', within='30')

# Or just get the latest observation within the last 15 minutes
latest = m.latest(stid='kfnl', within='15')

# Returns a time series from Fort Collins airport from Apr 26 18z to Apr 26 23z
time = m.timeseries(stid='kfnl', start='201504261800', end='201504262300')

# Returns the precip obs from Fort Collins airport from Apr 26 18z to Apr 27 12z
precip = m.precip(stid='kfnl',
                  start='201504261800',
                  end='201504271200',
                  units='precip|in')

# Learn more about all of the networks in MesoWest with the networks() func
networks = m.networks()
class WeatherDataGetter:
    def __init__(self,
                 API_KEY,
                 station_metadata_file,
                 wildfire_occurences_file,
                 wildfire_weather_file,
                 year_threshold,
                 state_specified=None,
                 station_radius_threshold=10):
        # Init meso
        self.meso = Meso(token=API_KEY)

        self.station_metadata = None
        # If file is not specified, retrieve data from API
        if os.path.isfile(station_metadata_file):
            self.station_metadata = pd.read_csv(station_metadata_file)
        else:
            self.station_metadata = self.get_station_metadata(
                station_metadata_file)

        # Class Vairables
        self.wildfires_df = pd.read_csv(wildfire_occurences_file)
        self.year_thresold = year_threshold
        self.state_specified = state_specified
        self.wildfire_weather_file = wildfire_weather_file
        self.station_radius_threshold = station_radius_threshold

        self.preprocess_wildfires()

    def get_5_nearest(self, lat, lon, n):
        """
        Takes position in the form of lat, lon and returns n nearest mesowest stations
        :param lat: latitude
        :param lon: longitude
        :param n: number of stations
        :return: n nearest stations
        """

        all_points = self.station_metadata[['latitude', 'longitude']]
        self.station_metadata['distance'] = cdist([(lat, lon)], all_points).T

        n_smallest = self.station_metadata.nsmallest(n=n, columns='distance')
        n_smallest['miles'] = [
            haversine(lon, lat, row['longitude'], row['latitude'])
            for _, row in n_smallest.iterrows()
        ]

        n_smallest = n_smallest[
            n_smallest['miles'] <= self.station_radius_threshold]

        return n_smallest['STID'].tolist()

    def preprocess_wildfires(self):
        """
        Preprocesses wildfires database, filtering by year_threshold and state (if specified)
        :return: None
        """
        # Convert dates to datetimes. Add an year column
        self.wildfires_df['DISCOVERY_DATE'] = pd.to_datetime(
            self.wildfires_df['DISCOVERY_DATE'])
        self.wildfires_df['CONT_DATE'] = pd.to_datetime(
            self.wildfires_df['CONT_DATE'])
        self.wildfires_df['year'] = pd.DatetimeIndex(
            self.wildfires_df['DISCOVERY_DATE']).year

        # Filter to all records greater than an year and sort row in descending order by date
        self.wildfires_df = self.wildfires_df[
            self.wildfires_df['DISCOVERY_DATE'].dt.year >=
            self.year_thresold].sort_values(by=['DISCOVERY_DATE'],
                                            ascending=False)

        # Filter to only a particular state
        if self.state_specified:
            self.wildfires_df = self.wildfires_df[self.wildfires_df['STATE'] ==
                                                  self.state_specified]

    def get_station_metadata(self, file_name):
        """
        Gets all stations metadata from mesowest API and saves it in a file
        :param file_name: file where station data should be stored
        :return: None
        """
        vars = ['air_temp', 'relative_humidity', 'wind_speed', 'precip_accum']
        metadata = self.meso.metadata(country='us',
                                      status='ACTIVE',
                                      var=vars,
                                      obrange='20110101, 20160101')

        out = []
        for i in range(len(metadata['STATION'])):
            try:
                out.append([
                    metadata['STATION'][i]['STID'],
                    metadata['STATION'][i]['LATITUDE'],
                    metadata['STATION'][i]['LONGITUDE']
                ])
            except:
                pass
        df = pd.DataFrame(out, columns=['STID', 'latitude', 'longitude'])
        df.to_csv(file_name)

    def get_weather_data(self):
        """
        Purpose is to get weather data from nearby stations of a fire
        :return: None
        """

        # TODO: Add last_n_days query functionality
        radius = '10'
        last_n_days = 7

        cnt = 0  # Keeps cnt of rows. Used to periodically write rows to file
        stations_data = [
        ]  # Stores data for a few stations till it is written to file
        header = True  # For dataframe header
        write_rows_threshold = 100

        columns = [
            'FOD_ID', 'STID', 'distance', 'date_time', 'air_temperature',
            'relative_humidity', 'wind_speed', 'precipitation'
        ]

        for index, row in self.wildfires_df.iterrows():
            if cnt % write_rows_threshold == 0:  # Every write_rows_threshold rows, write to file
                df = pd.DataFrame(stations_data, columns=columns)
                with open(self.wildfire_weather_file, 'a') as f:
                    df.to_csv(f, header=header)
                    header = False
                stations_data = []

            #
            start = (row['DISCOVERY_DATE'] - timedelta(days=0)).replace(
                hour=12, minute=0).strftime("%Y%m%d%H%M")
            lat = row['LATITUDE']
            lon = row['LONGITUDE']
            days = [start]

            station_data = self.get_weather_data_api(lat,
                                                     lon,
                                                     radius,
                                                     row['FOD_ID'],
                                                     days,
                                                     stids=self.get_5_nearest(
                                                         lat=float(lat),
                                                         lon=float(lon),
                                                         n=5))
            print(cnt)
            if station_data:
                stations_data.extend(station_data)
            cnt += 1

    def get_weather_data_api(self, lat, lon, radius, wildfire_ID, days, stids):
        """
        Queries mesowest API for given lat, lon and radius. Given nearby stations
        :param lat: optional, if stid provided
        :param lon: optional, if stid provided
        :param radius: optional, if stid provided
        :param wildfire_ID:
        :param days: TODO: for n days. Currently will only query for a single day
        :param stids: STIDs
        :return: station data
        """
        radius_param = str(lat) + ',' + str(lon) + ',' + str(radius)
        vars = ['air_temp', 'relative_humidity', 'wind_speed', 'precip_accum']

        # Get data for a locations with sepcified radius
        data = []
        try:
            for day in days:
                day_data = self.meso.attime(radius=radius_param,
                                            attime=day,
                                            within=60,
                                            stid=stids,
                                            vars=vars)
                if not day_data:
                    return None

                data.append(day_data)
        except Exception as e:
            print(e)
            return

        if not data:
            return

        # Query data to generate station_df which would have the following columns:
        # STID, Date_time, Var1, Var2...
        # We could omit all stations which do not have all the values
        # Currently, keeping all stations irrespective of their missing values
        # Each occurence of a date_time val is supposed to be a row.
        # Here since, we are only querying attime, we will have only 1 date_time field
        vars_data = [
            'date_time', 'air_temp_value_1', 'relative_humidity_value_1',
            'wind_speed_value_1', 'precip_accum_value_1'
        ]

        stations = [station['STID'] for station in data[0]['STATION']]
        station_data = []

        # We are storing station ID from first day's data.
        # Subsequently querying other days data for these stations and storing them
        # TODO: Optimize below for loop
        for station_id in stations:
            for day_idx in range(len(data)):
                if data[day_idx]:
                    temp = []
                    for station in data[day_idx]['STATION']:
                        if station['STID'] == station_id:
                            row = [
                                wildfire_ID, station['STID'],
                                station['DISTANCE']
                            ]
                            for var in vars_data:
                                if var == 'date_time':
                                    row.append(days[day_idx])
                                elif var in station['OBSERVATIONS']:
                                    row.append(
                                        station['OBSERVATIONS'][var]['value'])
                                else:
                                    row.append('')
                            temp.append(row)
                    station_data.extend(temp)

        return station_data