def get_street_lengths(street_names): data = etree.parse(open("data/new_haven.xml")) ## first, go through the data for each way, ## and extract the ids for each node involved. print " getting street nodes..." street_nodes = {} for element in data.iter("way"): names = element.findall("./tag[@k='name'].") if len(names) > 0: name = names[0].attrib["v"] if name in street_names: if not name in street_nodes.keys(): street_nodes[name] = [] refs = [] for node in element.iter("nd"): refs.append(node.attrib["ref"]) street_nodes[name].append(refs) print " getting street coords..." street_coords = {} ## get street coords... ## Search the data for each node to get its coordinates for name in street_nodes.keys(): street_coords[name] = [] for i in range(len(street_nodes[name])): street_coords[name].append([]) for j in range(len(street_nodes[name][i])): street_coords[name][i].append([]) node = data.find("./node[@id='" + street_nodes[name][i][j] + "']") street_coords[name][i][j] = [node.attrib["lon"], node.attrib["lat"]] with open("data/nh_bounds.pkl", "r") as f: ## todo: include description and/or code for how to obtain bounds nh_bounds = pickle.load(f) print " getting street hoods..." ### determine the neighborhood and weather or not a node is in New Haven for each node. street_innh = {} street_hood = {} for name in street_coords.keys(): street_innh[name] = [] street_hood[name] = [] for i in range(len(street_coords[name])): lngs = [float(j[0]) for j in street_coords[name][i]] lats = [float(j[1]) for j in street_coords[name][i]] innhs, hoods = get_neighborhood(lngs, lats) street_innh[name].append(innhs) street_hood[name].append(hoods) print " getting street lengths..." ## finally, we calculate the street length by adding ## the length between each pair of consecutive nodes ## in each segment, and adding the length of each segment, ## only considering those nodes within new haven. if len(street_coords) > 0: street_lengths = {} street_lengths_by_neighborhood = {} for name, segments in street_coords.items(): # todo: possible conditional here to eliminate street lengths of 0 street_lengths[name] = 0 street_lengths_by_neighborhood[name] = {} for i in range(len(segments)): if len(segments[i]) > 1: for j in range(1, len(segments[i])): distance_between_nodes = geopy.distance.vincenty( tuple(street_coords[name][i][j]), tuple(street_coords[name][i][j - 1]) ).miles ### find the length of the street strictly within new haven... current_node_innh = street_innh[name][i][j] last_node_innh = street_innh[name][i][j - 1] if current_node_innh or last_node_innh: if current_node_innh and last_node_innh: street_lengths[name] += distance_between_nodes else: street_lengths[name] += distance_between_nodes / 2.0 ### find the length of the street within each neighborhood... current_node_hood = street_hood[name][i][j] last_node_hood = street_hood[name][i][j - 1] if not (current_node_hood == "" and last_node_hood == ""): if current_node_hood == "": if not last_node_hood in street_lengths_by_neighborhood[name].keys(): street_lengths_by_neighborhood[name][last_node_hood] = distance_between_nodes / 2.0 else: street_lengths_by_neighborhood[name][last_node_hood] += distance_between_nodes / 2.0 elif last_node_hood == "": if not current_node_hood in street_lengths_by_neighborhood[name].keys(): street_lengths_by_neighborhood[name][current_node_hood] = ( distance_between_nodes / 2.0 ) else: street_lengths_by_neighborhood[name][current_node_hood] += ( distance_between_nodes / 2.0 ) elif current_node_hood == last_node_hood: if not current_node_hood in street_lengths_by_neighborhood[name].keys(): street_lengths_by_neighborhood[name][current_node_hood] = distance_between_nodes else: street_lengths_by_neighborhood[name][current_node_hood] += distance_between_nodes else: if not current_node_hood in street_lengths_by_neighborhood[name].keys(): street_lengths_by_neighborhood[name][current_node_hood] = ( distance_between_nodes / 2.0 ) else: street_lengths_by_neighborhood[name][current_node_hood] += ( distance_between_nodes / 2.0 ) if not last_node_hood in street_lengths_by_neighborhood[name].keys(): street_lengths_by_neighborhood[name][last_node_hood] = distance_between_nodes / 2.0 else: street_lengths_by_neighborhood[name][last_node_hood] += distance_between_nodes / 2.0 return street_lengths.keys(), street_lengths.values(), street_lengths_by_neighborhood.values() else: return [], [], []
def update_local_db(): #connect to mongodb server and create database... client = MongoClient() db= client.nh_data print "downloading new scf data..." ## the commented out code below can be used to ## to get date of latest update to save time ## for now, remove all previous ## data and start over from beginning ## so as to account for changes in the data db.issues.remove() latest_date=none #latest_entry = db.issues.find().sort("id",pymongo.DESCENDING).limit(1) #try: # latest_date = latest_entry.next()["created_at"] #except: # latest_date = None ids,repids,rtids,rtts,times,acks,statuses,addrs,lngs,lats,acknowledged_ats,closed_ats = get_data(latest_date) print "getting neighborhoods..." innhs,hoods = get_neighborhood(lngs,lats) print "getting street_names..." street_names = get_street_name(addrs) print "inserting issues into database..." for i in range(len(ids)): if innhs[i]: if not db.issues.find_one({"id":ids[i]}): db.issues.insert_one({"id":ids[i], "repid":repids[i], "rtid":rtids[i], "rtt":rtts[i], "created_at":times[i], "acknowledged":acks[i], "status":statuses[i], "address":addrs[i], "lng":lngs[i], "lat":lats[i], "neighborhood":hoods[i], "street_name":street_names[i], "acknowledged_at":acknowledged_ats[i], "closed_at":closed_ats[i]}) print "getting street info..." ## get street data. To save time, only calculate street length for new streets all_street_names=[] for issue in db.issues.find(): all_street_names.append(issue["street_name"]) existing_street_names=[] for street in db.streets.find(): existing_street_names.append(street["name"]) print "getting new street lengths..." unused_street_names=[street_name for street_name in all_street_names if street_name not in existing_street_names] new_street_names,new_street_lengths,new_street_lengths_by_neighborhood=get_street_lengths(unused_street_names) for i in range(len(new_street_names)): db.streets.insert_one({"name":new_street_names[i], "length":new_street_lengths[i]}) for hood in new_street_lengths_by_neighborhood[i].keys(): db.streets_by_neighborhood.insert_one({"name":new_street_names[i], "neighborhood":hood, "length":new_street_lengths_by_neighborhood[i][hood]})