def fetch_and_write_weather(last_timestamp, last_hourly_measurement):

    #Correct for datetime handling in knmy function by adding hour offset
    _, _, _, knmi_df = knmy.get_hourly_data(
        stations=[279],
        start=last_hourly_measurement - timedelta(hours=1),
        end=last_timestamp - timedelta(hours=1),
        parse=True)
    knmi_df = knmi_df.drop(
        knmi_df.index[0])  #drop first row, which contains a duplicate header

    knmi_df["timestamp"] = [
        (parse(date) + timedelta(hours=int(hour)))
        for date, hour in zip(knmi_df["YYYYMMDD"], knmi_df["HH"])
    ]
    knmi_df = knmi_df.drop(["STN", "YYYYMMDD", "HH"], axis=1)

    weather_string = knmi_df.to_csv(sep=",",
                                    date_format="%Y-%m-%d %H:%M:%S",
                                    index=False)

    filename = "weather" + to_writeable_timestamp(
        last_hourly_measurement) + "-to-" + to_writeable_timestamp(
            last_timestamp) + ".csv"
    tarstream = BytesIO()
    tar = tarfile.TarFile(fileobj=tarstream, mode='w')
    file_data = weather_string.encode('utf8')
    tarinfo = tarfile.TarInfo(name=filename)
    tarinfo.size = len(file_data)
    tarinfo.mtime = time.time()
    tar.addfile(tarinfo, BytesIO(file_data))
    tar.close()

    tarstream.seek(0)
    spark_master.put_archive("/opt/spark-data/weather", tarstream)
コード例 #2
0
def test_get_hourly_data():
    output = get_hourly_data(stations=[209], start=2017010101, end=2017010524)
    output_comparison(output, test_data['hourly'])
コード例 #3
0
from knmy.knmy import get_hourly_data, get_daily_rain_data
from knmy.parser import parse_raw_weather_data, parse_raw_rain_data

raw_data = get_hourly_data([209, 257], start=2017010101, end=2017010524)

disclaimer, stations, variables, data = parse_raw_weather_data(raw_data)


def test_disclaimer():
    with open('tests/test_data/disclaimer.txt') as f:
        disclaimer_reference = f.read()

    assert disclaimer == disclaimer_reference


def test_stations():
    stations_string = stations.to_csv()
    with open('tests/test_data/stations.csv') as f:
        stations_reference = f.read()

    assert stations_string == stations_reference


def test_variables():
    with open('tests/test_data/variables.txt') as f:
        variables_reference = f.read()

    assert str(variables) == variables_reference.strip('\n')


def test_data():
コード例 #4
0
#%% Loading data
from knmy import knmy
import knmi
#%% Showing essential attributes and structure of the data
#list stations:
knmi.stations
knmi.variables

#Hoogeveen is closest station, stationnumber 615
#In current API, station number of Hoogeveen is 279

#%% Get data through API
disclaimer, stations, variables, knmi_df = knmy.get_hourly_data(stations=[279], start=2017010112, end=2017010113, parse=True)

knmi_df.drop(knmi_df.index[0])

knmi_df