Beispiel #1
0
    def test_request_raises_bad_country(self):
        client = airbase.AirbaseClient()
        with pytest.raises(ValueError):
            client.request(country="lol123")

        with pytest.raises(ValueError):
            client.request(["NL", "lol123"])
Beispiel #2
0
 def test_download_metadata(self, tmpdir, metadata_response, capsys):
     fpath = str(tmpdir / "meta.csv")
     client = airbase.AirbaseClient()
     client.download_metadata(fpath)
     assert os.path.exists(fpath)
     with open(fpath) as h:
         assert h.read() == metadata_response.body
Beispiel #3
0
    def test_request_raises_bad_year(self):
        client = airbase.AirbaseClient()
        with pytest.raises(ValueError):
            client.request(year_from="1234")
            client.request(year_to="1234")

        with pytest.raises(ValueError):
            client.request(year_from="9999")
            client.request(year_to="9999")
Beispiel #4
0
    def test_request_pl(self):
        client = airbase.AirbaseClient()
        r = client.request(pl="NO")
        assert len(r.shortpl) == 1

        r = client.request(pl=["NO", "NO3"])
        assert len(r.shortpl) == 2

        with pytest.raises(ValueError):
            r = client.request(pl=["NO", "NO3", "Not a pl"])
Beispiel #5
0
    def test_init_connect_false(self, summary_response):
        client = airbase.AirbaseClient(connect=False)
        with pytest.raises(AttributeError):
            client.all_countries
        with pytest.raises(AttributeError):
            client.all_pollutants
        with pytest.raises(AttributeError):
            client.pollutants_per_country
        with pytest.raises(AttributeError):
            client.request()

        client.connect()
        assert client.all_countries is not None
        assert client.all_pollutants is not None
        assert client.pollutants_per_country is not None
        assert client.request() is not None
Beispiel #6
0
def main(output_file, retries, ignore_errors=False):
    """Check the entire AirBase database for broken links"""

    print("Will output bad links to {}".format(output_file))

    client = ab.AirbaseClient()
    req = client.request(preload_csv_links=True)  # get links to all files
    session = requests.Session()  # reuse HTTP connections

    # Define inside main to re-use Session
    def is_404(url, r=retries):
        try:
            response = session.head(url, timeout=1)
            return response.status_code == 404
        except:
            if r == 0 and not ignore_errors:
                raise
            elif r == 0 and ignore_errors:
                return None
            else:
                return is_404(url, r - 1)

    # clear output file
    try:
        os.remove(output_file)
    except FileNotFoundError:
        pass

    with ThreadPoolExecutor(REQUESTS_SESSION_CONNECTION_POOL_SIZE) as executor:
        promises = executor.map(
            is_404, tqdm(req._csv_links, desc="Creating queue")
        )

        total_bad = 0
        pbar = tqdm(total=len(req._csv_links), desc="Checking links")

        for i, not_found in enumerate(promises):
            pbar.update()

            if not_found:
                total_bad += 1
                with open(output_file, "a") as h:
                    h.write(req._csv_links[i] + "\n")
                pbar.set_description(f"{total_bad:,} bad links")
Beispiel #7
0
import pandas as pd
import numpy as np
import requests
import glob
import os
import airbase
client = airbase.AirbaseClient()
from nuts_finder import NutsFinder
nf = NutsFinder(year=2016)


def get_NutsCode(row: pd.Series) -> pd.Series:
    try:
        result = nf.find(lat=row["station_latitude_deg"],
                         lon=row["station_longitude_deg"])
        levels = [r['LEVL_CODE'] for r in result]
        result = result[levels.index(2)]
        return [result['NUTS_ID'], result['NUTS_NAME']]
    except:
        return [np.nan, np.nan]


def translate_stationCode(row: pd.Series) -> pd.Series:
    try:
        temp = airstations_nuts2_dict[row['station_european_code']]
        row['NUTS_ID'] = temp[0]
        row['NUTS_NAME'] = temp[1]
    except:
        row['NUTS_ID'] = np.nan
        row['NUTS_NAME'] = np.nan
    return row
Beispiel #8
0
def download_raw_data():
    print('Downloading the raw data.')
    if not os.path.exists('./data/airbase_data'):
        os.makedirs('./data/airbase_data')

    client = airbase.AirbaseClient()

    all_countries = client.all_countries
    for curr_country in all_countries:
        tt = time.time()

        if not os.path.exists('./data/airbase_data/' + curr_country):
            os.makedirs('./data/airbase_data/' + curr_country)

        r = client.request(country=curr_country,
                           pl=['NO2', 'O3', 'PM10', 'SO2'],
                           year_from=2015,
                           preload_csv_links=True,
                           verbose=False)
        all_csv_links = r._csv_links
        print(f'{curr_country} | {len(all_csv_links):5d} csv files')

        def download_csv_link(url):
            filename = url[url.rfind('/') + 1:]
            fullpath = './data/airbase_data/' + curr_country + '/' + filename
            if os.path.exists(fullpath):
                return

            with requests.Session() as s:
                attempts = 0
                while True:
                    try:
                        download = s.get(url)
                        break
                    except Exception as e:
                        attempts = attempts + 1
                        time.sleep(1)
                        if attempts > 5:
                            print('Failed to download', url)
                            return
            try:
                decoded_content = download.content.decode('utf-8')
            except Exception as e:
                try:
                    decoded_content = download.content.decode('utf-16')
                except Exception as e:
                    print('Failed to decode.', url)
                    return
            cr = csv.reader(decoded_content.splitlines(), delimiter=',')
            my_list = list(cr)

            with open(fullpath, "w", newline="") as f:
                writer = csv.writer(f, delimiter='\t')
                writer.writerows(my_list)
            return

        parallel_output = []
        parallel_inputs = zip(all_csv_links)
        for parameters in parallel_inputs:
            lazy_result = dask.delayed(download_csv_link)(*parameters)
            parallel_output.append(lazy_result)

        n_workers = 8  # Set this to the number of cpus you have.
        dask.compute(*parallel_output,
                     scheduler='processes',
                     num_workers=n_workers)
        print(f'{curr_country} | {int(time.time() - tt):5d} sec')
Beispiel #9
0
 def test_search_pl_limit(self):
     client = airbase.AirbaseClient()
     result = client.search_pollutant("N", limit=1)
     assert len(result) == 1
Beispiel #10
0
 def test_search_pl_no_result(self):
     client = airbase.AirbaseClient()
     result = client.search_pollutant("Definitely not a pollutant")
     assert result == []
Beispiel #11
0
 def test_search_pl_exact(self):
     client = airbase.AirbaseClient()
     result = client.search_pollutant("NO3")
     assert result[0]["pl"] == "NO3"
Beispiel #12
0
 def test_search_pl_shortest_first(self):
     client = airbase.AirbaseClient()
     result = client.search_pollutant("N")
     names = [r["pl"] for r in result]
     assert len(names[0]) <= len(names[1])
     assert len(names[0]) <= len(names[-1])
Beispiel #13
0
 def test_request_not_pl_and_shortpl(self):
     client = airbase.AirbaseClient()
     with pytest.raises(ValueError):
         client.request(pl="O3", shortpl="123")
Beispiel #14
0
 def test_request_response_generated(self):
     client = airbase.AirbaseClient()
     r = client.request()
     assert isinstance(r, airbase.AirbaseRequest)
Beispiel #15
0
def client(withoutresponses):
    """Return an initialized AirbaseClient"""
    return airbase.AirbaseClient(connect=True)
Beispiel #16
0
 def test_saerch_pl_case_insensitive(self):
     client = airbase.AirbaseClient()
     result = client.search_pollutant("no3")
     assert result[0]["pl"] == "NO3"
Beispiel #17
0
 def test_init_connect(self, summary_response):
     client = airbase.AirbaseClient(connect=True)
     assert client.all_countries is not None
     assert client.all_pollutants is not None
     assert client.pollutants_per_country is not None