def get_data(years=range(2001, 2015), replacements={}, recalculate=False): path = os.path.join(get_data_dir("terrenos"), "terrenos_completo.csv") if not os.path.isfile(path) or recalculate: df = create_clean_dataset(years, replacements) intersect_data_with_shps( df, get_division_path("fracciones_caba_censo_2010")) intersect_data_with_shps(df, get_division_path("radios_censo_2010")) df.to_csv(path, encoding="utf-8") return df else: df = pd.read_csv(path, encoding="utf-8") return df.drop("Unnamed: 0", 1)
def main(area_level="RADIO", limit=10000, field_name="reach_area", transport_shp_name="recorridos_de_colectivos"): df, old_index, new_index = get_indicators_df(area_level) already_done = list(df[pd.notnull(df[field_name])].index) # get bus lines path_bus = get_transport_shp_path(transport_shp_name) sf_lines = shapefile.Reader(path_bus) lines = get_shapely_shapes(sf_lines) # get division shapes path_divisions = get_division_path(AREA_LEVEL_SHP_NAME[area_level]) sf_polys = shapefile.Reader(path_divisions) polys = get_shapely_shapes(sf_polys) ids = [record[0] for record in sf_polys.records()] area_level_shapes = { id_p: polygon for id_p, polygon in zip(ids, polys) if id_p not in already_done } # iterate division shapes calculating the indicator start = time.time() total_shapes = len(area_level_shapes) progress_status = "Nothing done." for i, (id_shape, shape) in enumerate(area_level_shapes.iteritems(), 1): # only add new value if the shape is in the index if id_shape in new_index: try: lines_intersect = intersecting_lines(shape, lines) surface = calc_reachable_surface_and_people( shape, lines_intersect) add_value(id_shape, surface, df) elapsed = (time.time() - start) / 60.0 average = elapsed / i prediction = (total_shapes - i) * average progress_status = """ {} {}/{} in {:.2f} mins. Average: {:.2f} Time to end: {:.2f} """.format(id_shape.ljust(10), i, total_shapes, elapsed, average, prediction).strip() print(progress_status, end="\r" * len(progress_status)) if i >= limit: print(progress_status) print("Limit of", limit, "shapes reached.") break except KeyboardInterrupt: print(progress_status) print("Interrupted!") break print("Saving values...", end=" ") save_indicators_df(df, old_index) print("Done.")
def main(area_level="RADIO", limit=10000, field_name="reach_area", transport_shp_name="recorridos_de_colectivos"): df, old_index, new_index = get_indicators_df(area_level) already_done = list(df[pd.notnull(df[field_name])].index) # get bus lines path_bus = get_transport_shp_path(transport_shp_name) sf_lines = shapefile.Reader(path_bus) lines = get_shapely_shapes(sf_lines) # get division shapes path_divisions = get_division_path(AREA_LEVEL_SHP_NAME[area_level]) sf_polys = shapefile.Reader(path_divisions) polys = get_shapely_shapes(sf_polys) ids = [record[0] for record in sf_polys.records()] area_level_shapes = {id_p: polygon for id_p, polygon in zip(ids, polys) if id_p not in already_done} # iterate division shapes calculating the indicator start = time.time() total_shapes = len(area_level_shapes) progress_status = "Nothing done." for i, (id_shape, shape) in enumerate(area_level_shapes.iteritems(), 1): # only add new value if the shape is in the index if id_shape in new_index: try: lines_intersect = intersecting_lines(shape, lines) surface = calc_reachable_surface_and_people(shape, lines_intersect) add_value(id_shape, surface, df) elapsed = (time.time() - start) / 60.0 average = elapsed / i prediction = (total_shapes - i) * average progress_status = """ {} {}/{} in {:.2f} mins. Average: {:.2f} Time to end: {:.2f} """.format(id_shape.ljust(10), i, total_shapes, elapsed, average, prediction).strip() print(progress_status, end="\r" * len(progress_status)) if i >= limit: print(progress_status) print("Limit of", limit, "shapes reached.") break except KeyboardInterrupt: print(progress_status) print("Interrupted!") break print("Saving values...", end=" ") save_indicators_df(df, old_index) print("Done.")
def find_containing_radios(df): shp_path = get_division_path("radios_censo_2010") sf = shapefile.Reader(shp_path) radios_dict = {int(comuna): {} for comuna in df["comuna"].unique()} for sr in sf.iterShapeRecords(): shape = get_shapely_shape(sr.shape) radio = sr.record[0] comuna = int(radio.split("_")[0]) radios_dict[comuna][radio] = shape return df.apply(find_containing_shape, axis=1, args=(radios_dict, shp_path))
def _prepare_test_case(): path_colectivos = get_transport_shp_path("recorrido-colectivos") path_radios = get_division_path("radios_censo_2010") sf_lines = shapefile.Reader(path_colectivos) lines = get_shapely_shapes(sf_lines) sf_polys = shapefile.Reader(path_radios) polys = get_shapely_shapes(sf_polys) ids = [record[0] for record in sf_polys.records()] radios = {id: polygon for id, polygon in zip(ids, polys)} radio = radios["14_1_8"] return radio, lines