def test_cez(): cez_path = pathlib.Path('samples/cez.html') with open(cez_path) as f: bs = BeautifulSoup(f, 'html.parser') result = parse.parse_page('výroba elektřiny', '110100146', bs) assert result.id == '110100146' assert result.pocet_zdroju == 72 assert result.vykony[0].lic_id == '110100146' assert result.vykony[0].druh == 'Elektrický' assert result.vykony[0].technologie == 'Celkový' assert result.vykony[0].mw == 9716.003 assert result.provozovny[0].vykony[0].technologie == 'Celkový' assert result.provozovny[0].vykony[0].druh == 'Elektrický' assert result.provozovny[0].vykony[0].mw == 7.3 assert result.provozovny[0].vykony[3].technologie == 'Parní' assert result.provozovny[0].vykony[3].mw == 49.8 assert result.provozovny[0].pocet_zdroju == 2 assert result.provozovny[-1].pocet_zdroju == 1 assert result.provozovny[-1].nazev == 'Elektrárna Ledvice IV B6' assert result.provozovny[-1].vykony[-1].technologie == 'Parní' assert result.provozovny[-1].vykony[-1].mw == 1288.130
def get_data( business: str, lic_ids: List, url: str, start: int, end: int ) -> List[parse.Licence]: count = 0 lic_list = [] for lic_id in lic_ids[start:end]: params = {'lic-id': lic_id} try: soup = parse.request_soup(url, count, params=params) parsed_lic = parse.parse_page(business, lic_id, soup) except Error as e: print(e) print(f'Error occured when parsing id {lic_id}') print(f'for {business} at index {lic_ids.index(lic_id)}') raise SystemExit lic_list.append(parsed_lic) count += 1 if count % 500 == 0: print(f'Parsed {count} licenses') return lic_list
def test_plzen(): plzen_path = pathlib.Path('samples/plzen.html') with open(plzen_path) as f: bs = BeautifulSoup(f, 'html.parser') result = parse.parse_page('výroba elektřiny', '110100054', bs) assert result.provozovny[0].vykony[0].technologie == 'Celkový' assert result.provozovny[0].vykony[0].druh == 'Elektrický' assert result.provozovny[0].vykony[0].mw == 90.0 assert result.provozovny[0].vykony[3].technologie == 'Parní' assert result.provozovny[0].vykony[3].mw == 430.1
def test_plzen_export_to_csv(): oleska_path = pathlib.Path('samples/plzen.html') with open(oleska_path) as f: bs = BeautifulSoup(f, 'html.parser') result = parse.parse_page('výroba elektřiny', '110100054', bs) # Export csvs within tests directory output_dir = pathlib.Path('tests/licenses/electricity/') result.to_csv(output_dir, 'licenses.csv') output_dir_list = list(output_dir.iterdir()) sub_path = 'tests/licenses/electricity' assert pathlib.Path(f'{sub_path}/licenses.csv') in output_dir_list assert pathlib.Path(f'{sub_path}/capacities.csv') in output_dir_list assert pathlib.Path(f'{sub_path}/facilities.csv') in output_dir_list assert pathlib.Path( f'{sub_path}/facilities_capacities.csv') in output_dir_list with open(f'{sub_path}/facilities.csv') as csvf: reader = csv.DictReader(csvf) data = [row for row in reader] assert len(data) == 1 assert data[0]['id'] == '1' assert data[0]['lic_id'] == '110100054' assert data[0]['nazev'] == 'ELÚ 3' with open(f'{sub_path}/licenses.csv') as csvf: reader = csv.DictReader(csvf) data = [row for row in reader] assert len(data) == 1 assert data[0]['id'] == '110100054' assert data[0]['pocet_zdroju'] == '3' with open(f'{sub_path}/capacities.csv') as csvf: reader = csv.DictReader(csvf) data = [row for row in reader] assert len(data) == 4 assert data[0]['mw'] == '90.0' assert data[0]['druh'] == 'Elektrický' assert data[1]['mw'] == '430.1' assert data[1]['druh'] == 'Tepelný' assert data[2]['technologie'] == 'Parní' # Cleanup for csvfile in [ 'capacities.csv', 'facilities.csv', 'facilities_capacities.csv', 'licenses.csv' ]: pathlib.Path(f'{sub_path}/{csvfile}').unlink() pathlib.Path(f'{sub_path}').rmdir() pathlib.Path('tests/licenses').rmdir()
def test_oleska(): oleska_path = pathlib.Path('samples/oleska.html') with open(oleska_path) as f: bs = BeautifulSoup(f, 'html.parser') result = parse.parse_page('výroba elektřiny', '110100010', bs) assert result.id == '110100010' assert len(result.provozovny) == 1 assert result.pocet_zdroju == 2 assert result.vykony[0].mw == 0.013 assert 'Tepelný' not in [vykon.druh for vykon in result.vykony] assert result.provozovny[0].kod_katastru == '670936' assert result.provozovny[0].vykony[0].technologie == 'Celkový' assert result.provozovny[0].vykony[0].druh == 'Elektrický' assert result.provozovny[0].vykony[0].mw == 0.013 assert result.provozovny[0].vykony[1].technologie == 'Vodní' assert result.provozovny[0].vykony[1].mw == 0.013