def generate_match_accident_road_of_one_month(year, month):
    filepath = workdir + f'data/match_accident-road_{year}-{month}.parquet'
    if isdir(filepath):  # Skip if already done
        return
    print(f'Generating {year}-{month}')
    spark = init_spark()
    road_df = get_road_df(spark, use_cache=True)
    accident_df = preprocess_accidents(get_accident_df(spark))

    start_day_str = f'{year}-{month:02}-01'
    if month == 12:
        end_year = year + 1
        month = 0
    else:
        end_year = year
    end_day_str = f'{end_year}-{(month + 1):02}-01'

    start_day = datetime.datetime.fromisoformat(start_day_str)
    end_day = datetime.datetime.fromisoformat(end_day_str)
    accident_df = (accident_df.filter((col('date') >= start_day)
                                      & (col('date') < end_day)))

    match_accident_road = \
        match_accidents_with_roads(spark, road_df, accident_df,
                                   use_cache=False)

    match_accident_road.write.parquet(filepath)
    spark.stop()  # Force garbage collection and empty temp dir
#!/usr/bin/env python
from accident_prediction_montreal.accidents_montreal import get_accident_df
from accident_prediction_montreal.weather import get_weather_station_id_df
from accident_prediction_montreal.utils import init_spark
from accident_prediction_montreal.preprocess import preprocess_accidents

spark = init_spark()

accident_df = preprocess_accidents(get_accident_df(spark))

get_weather_station_id_df(spark, accident_df)
def spark():
    return init_spark()