def fetch_and_write_weather(last_timestamp, last_hourly_measurement): #Correct for datetime handling in knmy function by adding hour offset _, _, _, knmi_df = knmy.get_hourly_data( stations=[279], start=last_hourly_measurement - timedelta(hours=1), end=last_timestamp - timedelta(hours=1), parse=True) knmi_df = knmi_df.drop( knmi_df.index[0]) #drop first row, which contains a duplicate header knmi_df["timestamp"] = [ (parse(date) + timedelta(hours=int(hour))) for date, hour in zip(knmi_df["YYYYMMDD"], knmi_df["HH"]) ] knmi_df = knmi_df.drop(["STN", "YYYYMMDD", "HH"], axis=1) weather_string = knmi_df.to_csv(sep=",", date_format="%Y-%m-%d %H:%M:%S", index=False) filename = "weather" + to_writeable_timestamp( last_hourly_measurement) + "-to-" + to_writeable_timestamp( last_timestamp) + ".csv" tarstream = BytesIO() tar = tarfile.TarFile(fileobj=tarstream, mode='w') file_data = weather_string.encode('utf8') tarinfo = tarfile.TarInfo(name=filename) tarinfo.size = len(file_data) tarinfo.mtime = time.time() tar.addfile(tarinfo, BytesIO(file_data)) tar.close() tarstream.seek(0) spark_master.put_archive("/opt/spark-data/weather", tarstream)
def test_get_hourly_data(): output = get_hourly_data(stations=[209], start=2017010101, end=2017010524) output_comparison(output, test_data['hourly'])
from knmy.knmy import get_hourly_data, get_daily_rain_data from knmy.parser import parse_raw_weather_data, parse_raw_rain_data raw_data = get_hourly_data([209, 257], start=2017010101, end=2017010524) disclaimer, stations, variables, data = parse_raw_weather_data(raw_data) def test_disclaimer(): with open('tests/test_data/disclaimer.txt') as f: disclaimer_reference = f.read() assert disclaimer == disclaimer_reference def test_stations(): stations_string = stations.to_csv() with open('tests/test_data/stations.csv') as f: stations_reference = f.read() assert stations_string == stations_reference def test_variables(): with open('tests/test_data/variables.txt') as f: variables_reference = f.read() assert str(variables) == variables_reference.strip('\n') def test_data():
#%% Loading data from knmy import knmy import knmi #%% Showing essential attributes and structure of the data #list stations: knmi.stations knmi.variables #Hoogeveen is closest station, stationnumber 615 #In current API, station number of Hoogeveen is 279 #%% Get data through API disclaimer, stations, variables, knmi_df = knmy.get_hourly_data(stations=[279], start=2017010112, end=2017010113, parse=True) knmi_df.drop(knmi_df.index[0]) knmi_df