def read_source_file(file_name, locoation_file_name): with open(file_name, 'r') as readCollision: # r represent read model print("Start to read file: " + file_name + ". This may take a while...") file = csv.reader(readCollision) for row in file: if "COLLISION_ID" not in row[0]: collision = Collision() collision.collision_id = row[0] collision.location_id = row[1] collision.hour_key = row[2] collision.environment = row[3] collision.light = row[4] collision.surface_condition = row[5] collision.traffic_control = row[6] collision.traffic_control_condition = row[7] collision.collision_classification = row[8] collision.impace_type = row[9] collision.no_of_pedestrians = row[10] collision.date = row[11] collisions.append(collision) readCollision.close() with open(locoation_file_name, 'r') as readLocation: # r represent read model print("Start to read file: " + locoation_file_name + ". This may take a while...") file = csv.reader(readLocation) for row in file: if "LOCATION_ID" not in row[0]: location = Location() location.location_id = row[0] location.street_name = row[1] location.intersection_one = row[2] location.intersection_two = row[3] location.longitude = row[4] location.latitude = row[5] location.neighborhood = row[6] location.closest_weather_station = row[7] locations.append(location) readLocation.close()
def location_string_processor(file_name): """ Retrieve data about the location of the ottawa's collision record from a csv file. :param file_name: the file to retrieve data """ global total_record global total_valid_record locations = [] total_ctr = 0 neighborhoods = generate_neighborhood_lookup_table() weather_stations = generate_weather_station_list() with open(file_name, 'r') as readData: # r represent read model print("Start to read file: " + file_name + ". This may take a while...") file = csv.reader(readData) for row in file: if "COLLISION_ID" not in row[0]: location = Location() total_ctr = total_ctr + 1 sys.stdout.write("\r" + str(total_ctr) + " records have been processed!") sys.stdout.flush() location_str = row[1] is_at = False is_btwn = False if "@" in location_str: is_at = True if "btwn" in location_str: is_btwn = True if is_at and is_btwn: raise Exception( "Unexpected format: have both btwn and @: " + location_str) if is_at: substring = location_str.split("@", 1) location.street_name = substring[0] substring = substring[1] if "&" in substring: substring = substring.split("&", 1) if len(substring) == 2: location.intersection_one = substring[0] location.intersection_two = substring[1] elif len(substring) == 1: location.intersection_one = substring[0] location.intersection_two = "N/A" else: raise Exception("Wrong format location address: " + location_str) if "/" in substring: substring = substring.split("/", 1) if len(substring) == 2: location.intersection_one = substring[0] location.intersection_two = substring[1] elif len(substring) == 1: location.intersection_one = substring[0] location.intersection_two = "N/A" else: raise Exception("Wrong format location address: " + location_str) if "@" in substring or "btwn" in substring: raise Exception("Wrong location format " + location_str) elif is_btwn: substring = location_str.split("btwn", 1) location.street_name = substring[0] substring = substring[1] if "&" in substring: substring = substring.split("&", 1) if len(substring) == 2: location.intersection_one = substring[0] location.intersection_two = substring[1] elif len(substring) == 1: location.intersection_one = substring[0] location.intersection_two = "N/A" else: raise Exception("Wrong format location address: " + location_str) if "/" in substring: substring = substring.split("/", 1) if len(substring) == 2: location.intersection_one = substring[0] location.intersection_two = substring[1] elif len(substring) == 1: location.intersection_one = substring[0] location.intersection_two = "N/A" else: raise Exception("Wrong format location address: " + location_str) if "@" in substring or "btwn" in substring: raise Exception("Wrong location format " + location_str) else: substring = location_str.split("/", 1) location.street_name = substring[0] substring = substring[1] if "&" in substring: substring = substring.split("&", 1) if len(substring) == 2: location.intersection_one = substring[0] location.intersection_two = substring[1] elif len(substring) == 1: location.intersection_one = substring[0] location.intersection_two = "N/A" else: raise Exception("Wrong format location address: " + location_str) if "/" in substring: substring = substring.split("/", 1) if len(substring) == 2: location.intersection_one = substring[0] location.intersection_two = substring[1] elif len(substring) == 1: location.intersection_one = substring[0] location.intersection_two = "N/A" else: raise Exception("Wrong format location address: " + location_str) if "@" in substring or "btwn" in substring: raise Exception("Wrong location format " + location_str) location.longitude = row[2] if ((int(float(location.longitude)) != -75) and (int(float(location.longitude)) != -76)): raise Exception("Wrong longitude: [" + location.longitude + "], " + row[2]) location.latitude = row[3] if int(float(location.latitude)) != 45 and int( float(location.latitude)) != 44: raise Exception("Wrong latitude: " + row[3] + ", [" + location.latitude + "]") location.location_id = str(location.longitude) + str( location.latitude) shortest_dist = float("inf") for neighborhood in neighborhoods: difflong = float( location.longitude) - neighborhood.longitude difflati = float(location.latitude) - neighborhood.latitude distance = math.sqrt(pow(difflong, 2) + pow(difflati, 2)) if distance < shortest_dist: shortest_dist = distance location.neighborhood = neighborhood.neighborhood shortest_dist = float("inf") for station in weather_stations: difflong = float(location.longitude) - float( station[7]) # long difflati = float(location.latitude) - float( station[6]) # lati distance = math.sqrt(pow(difflong, 2) + pow(difflati, 2)) if distance < shortest_dist: shortest_dist = distance location.closest_weather_station = station[0] # name locations.append(location) return locations