def get_noaa_data(start_time, end_time):
    columns = [
        'usaf', 'wban', 'datetime', 'latitude', 'longitude', 'elevation',
        'windAngle', 'windSpeed', 'temperature', 'stationName', 'p_k'
    ]
    isd = NoaaIsdWeather(start_time, end_time, cols=columns)
    noaa_df = isd.to_pandas_dataframe()
    df_filtered = noaa_df[noaa_df["usaf"].isin(usaf_list)]
    df_filtered.reset_index(drop=True)
    print("Received {0} rows of training data between {1} and {2}".format(
        df_filtered.shape[0], start_time, end_time))
    return df_filtered
    def __get_weather_data_for_day(self, day, lat, lon):
        '''
        Gets weather data for a given day and pushes it to eventhub
        '''
        try:
            # get data for given date range.
            start_time = time.time()
            LOG.info("Getting data for " + day.strftime("%m/%d/%Y, %H:%M:%S"))
            weather_data = NoaaIsdWeather(day, day)
            LOG.info("Successfully got data for " + day.strftime("%m/%d/%Y, %H:%M:%S"))

            # get the data into a pandas data frame, so we can filter and process
            weather_data_df = weather_data.to_pandas_dataframe()
            LOG.info("Took {} seconds to get the data.".format(time.time() - start_time))

            # out of the lat longs available get the nearest points
            LOG.info("Finding the nearest latitude and longitude from the available data")
            (nearest_lat, nearest_lon) = UtilFunctions.find_nearest_lat_longs_in_data(weather_data_df, lat, lon)
            LOG.info("nearest lat, lon: [" + str(nearest_lat) + "," + str(nearest_lon) + "]")

            # filter the data to this lat and lon
            LOG.info("Filtering the data to nearest lat, lon")
            filtered_weather_data = weather_data_df[(weather_data_df['latitude'] == nearest_lat) & (weather_data_df['longitude'] == nearest_lon)]
            LOG.info(filtered_weather_data)

            # push the data to eventhub
            LOG.info("Pushing data to eventhub")
            wdl_id = self.__push_weather_data_to_farmbeats(filtered_weather_data)
            LOG.info("Successfully pushed data")

            # Update the status for the job
            if FLAGS.job_status_blob_sas_url:
                msg = "Weather data pushed for start_date: {} to end_date: {}\n for nearest_lat: {}, nearest_lon: {}\n provided lat:{}, lon:{}".format(
                    FLAGS.start_date, FLAGS.end_date, nearest_lat, nearest_lon, FLAGS.latitude, FLAGS.longitude)
                writer = JobStatusWriter(FLAGS.job_status_blob_sas_url)
                output_writer = writer.get_output_writer()
                output_writer.set_prop("WeatherDataLocationId: ", wdl_id)
                output_writer.set_prop("Message: ", msg)
                writer.set_success(True)
                writer.flush()

        except Exception as err:
            # Update the status in failure
            if FLAGS.job_status_blob_sas_url:
                writer = JobStatusWriter(FLAGS.job_status_blob_sas_url)
                writer.set_success(False)
                writer.flush()
            raise JobError(str(err), JobConstants.INTERNAL_ERROR, False)
Exemple #3
0
 def __get_weather_data_for_date_range(self, start_date, end_date):
     '''
     Returns all the weather data for a given date range.
     '''
     start_date = parser.parse(FLAGS.start_date)
     end_date = parser.parse(FLAGS.end_date)
     return NoaaIsdWeather(start_date, end_date)
Exemple #4
0
from datetime import datetime, timedelta
from azureml.core import Dataset, Datastore, Workspace
from azureml.opendatasets import NoaaIsdWeather

# get workspace and datastore
ws = Workspace.from_config()
dstore = ws.get_default_datastore()

# adjust parameters as needed
target_years = list(range(2010, 2020))
start_month = 1

# get data
for year in target_years:
    for month in range(start_month, 12 + 1):
        path = 'weather-data/{}/{:02d}/'.format(year, month)
        try:
            start = datetime(year, month, 1)
            end = datetime(year, month,
                           monthrange(year, month)[1]) + timedelta(days=1)
            isd = NoaaIsdWeather(start, end).to_pandas_dataframe()
            isd = isd[isd['stationName'].str.contains('FLORIDA',
                                                      regex=True,
                                                      na=False)]
            os.makedirs(path, exist_ok=True)
            isd.to_parquet(path + 'data.parquet')
        except Exception as e:
            print('Month {} in year {} likely has no data.\n'.format(
                month, year))
            print('Exception: {}'.format(e))
Exemple #5
0
# imports
import pickle
from datetime import datetime
from azureml.opendatasets import NoaaIsdWeather
from sklearn.linear_model import LinearRegression

# get weather dataset
start = datetime(2019, 1, 1)
end = datetime(2019, 1, 14)
isd = NoaaIsdWeather(start, end)

# convert to pandas dataframe and filter down
df = isd.to_pandas_dataframe().fillna(0)
df = df[df['stationName'].str.contains('FLORIDA', regex=True, na=False)]

# features for training
X_features = ['latitude', 'longitude', 'temperature', 'windAngle', 'windSpeed']
y_features = ['elevation']

# write the training dataset to csv
training_dataset = df[X_features + y_features]
training_dataset.to_csv('training.csv', index=False)

# train the model
X = training_dataset[X_features]
y = training_dataset[y_features]
model = LinearRegression().fit(X, y)

# save the model as a .pkl file
with open('elevation-regression-model.pkl', 'wb') as f:
    pickle.dump(model, f)