def validate_ids_unique(data): seen = set() for row in data: row_id = row["id"] if row_id in seen: print(row) critical_exit(f"Duplicate id found {row_id}") else: seen.add(row_id)
def add_missing_ids(data): for row in data: if "id" not in row or len(row["id"]) == 0: row["id"] = gen_id(row) print("Added id: " + row["id"]) if "name" not in row: print(row) data_builder.critical_exit( "this row is broken with no name? (missing ###):") return data
def add_missing_ids(): data = data_builder.read_all_data() for row in data: if "id" not in row or len(row["id"]) == 0: row["id"] = gen_id(row) print("Added id: " + row["id"]) if "name" not in row: print(row) critical_exit("this row is broken with no name? (missing ###):") validate_ids_unique(data) rewrite_data(data)
def gen_id(row): state = row['state'] state_abbrev = us_state_to_abbrev[state].lower() city = row['city'] city_abbrev = city.replace(' ', '').replace('.', '').lower() if len(city_abbrev) == 0: if state_abbrev == unknown_location_acronym: city_abbrev = unknown_location_acronym elif state_abbrev == 'dc': city_abbrev = 'dc' else: critical_exit("invalid city abbreviation, exiting") # id_line = f'id: {state_abbrev}-{city_abbrev}-{city_index}' return f"{state_abbrev}-{city_abbrev}-{random_chars(4)}"
def gen_id(row): state = row["state"] state_abbrev = us_state_to_abbrev[state].lower() city = row["city"] city_abbrev = city.replace(" ", "").replace(".", "").lower() if len(city_abbrev) == 0: if state_abbrev == unknown_location_acronym: city_abbrev = unknown_location_acronym elif state_abbrev == "dc": city_abbrev = "dc" else: data_builder.critical_exit("invalid city abbreviation, exiting") # id_line = f'id: {state_abbrev}-{city_abbrev}-{city_index}' return f"{state_abbrev}-{city_abbrev}-{random_chars(4)}"