def test_mean_per_month(): df = get_dataframe("../../data/main/cleaned/mean/Amsterdam_o3_2015.csv") measure_name = df.columns.values[-1] expected_mean = df[df['DatetimeBegin'].str.contains( '2015-01')][measure_name].sum() / (24 * 31) df = mean_per_month(df) assert expected_mean.round(2) == df[df[DATE_NAME] == first_day][measure_name].item()
def plot_mean_per_pol_city_year(city: CITY, pollutant: POLLUTANT, year: YEAR): df = get_dataframe( f'../../data/main/cleaned/mean/{city.name}_{pollutant.name}_{year.name}.csv' ) df = mean_per_day(df) sns.set_style("whitegrid", {'grid.linestyle': '-'}) plt.figure(figsize=(12, 6)) sns.lineplot(x="Date", y=f"mean {pollutant.name} (µg/m3)", data=df).set_title( f'{city.name} {pollutant.name} emissions {year.name}') plt.show()
def test_get_mean_frame(): path = "../../data/main/cleaned/o3/Amsterdam/" year = YEAR['2015'] df = concat_sets(path, year) df = get_mean_frame(df, POLLUTANT.o3) mean = df.iloc[0]['mean o3 (µg/m3)'] total = 0 nbr = 0 for path in Path(path).rglob('*' + year.name + '*.csv'): df2 = get_dataframe(path) total += df2[df2['DatetimeBegin'] == '2015-01-01 00:00:00 +01:00']["Concentration"].item() nbr += 1 assert mean == (total / nbr)
def plot_mean_per_pol_city(city: CITY, pollutant: POLLUTANT, save=False): df = pd.DataFrame() for path in Path("../../data/main/cleaned/mean/").rglob( f'{city.name}_{pollutant.name}_*.csv'): print(path.absolute()) df_tmp = mean_per_month(get_dataframe(path)) df = pd.concat([df, df_tmp]) fig, axes = plt.subplots(figsize=(14, 6)) sns.set_style("whitegrid", {'grid.linestyle': '-'}) axes = sns.lineplot(x=DATE_NAME, y=f'mean {pollutant.name} (µg/m3)', data=df) axes.set_title(f'{city.name} {pollutant.name} emissions') # plt.xticks(rotation=40, ha='right') plt.tight_layout() plt.show() if save: axes.get_figure().savefig(f'{city.name}_plot_{pollutant.name}.png') plt.close()
def test_get_dataframe(): df = emissions.get_dataframe("../../data/test/csv_test_1.csv") assert len(df) == 3 assert len(df.columns) == 3
def test_get_dataframe_encoding(): df = emissions.get_dataframe("../../data/test/csv_test_2.csv", 'iso-8859-1') assert len(df) == 3 assert len(df.columns) == 3
def get_stations_data(path: str) -> DataFrame: df = emissions.get_dataframe(path, 'iso-8859-1') return df