def update_local_db(): #connect to mongodb server and create database... client = MongoClient() db= client.nh_data print "downloading new scf data..." ## the commented out code below can be used to ## to get date of latest update to save time ## for now, remove all previous ## data and start over from beginning ## so as to account for changes in the data db.issues.remove() latest_date=none #latest_entry = db.issues.find().sort("id",pymongo.DESCENDING).limit(1) #try: # latest_date = latest_entry.next()["created_at"] #except: # latest_date = None ids,repids,rtids,rtts,times,acks,statuses,addrs,lngs,lats,acknowledged_ats,closed_ats = get_data(latest_date) print "getting neighborhoods..." innhs,hoods = get_neighborhood(lngs,lats) print "getting street_names..." street_names = get_street_name(addrs) print "inserting issues into database..." for i in range(len(ids)): if innhs[i]: if not db.issues.find_one({"id":ids[i]}): db.issues.insert_one({"id":ids[i], "repid":repids[i], "rtid":rtids[i], "rtt":rtts[i], "created_at":times[i], "acknowledged":acks[i], "status":statuses[i], "address":addrs[i], "lng":lngs[i], "lat":lats[i], "neighborhood":hoods[i], "street_name":street_names[i], "acknowledged_at":acknowledged_ats[i], "closed_at":closed_ats[i]}) print "getting street info..." ## get street data. To save time, only calculate street length for new streets all_street_names=[] for issue in db.issues.find(): all_street_names.append(issue["street_name"]) existing_street_names=[] for street in db.streets.find(): existing_street_names.append(street["name"]) print "getting new street lengths..." unused_street_names=[street_name for street_name in all_street_names if street_name not in existing_street_names] new_street_names,new_street_lengths,new_street_lengths_by_neighborhood=get_street_lengths(unused_street_names) for i in range(len(new_street_names)): db.streets.insert_one({"name":new_street_names[i], "length":new_street_lengths[i]}) for hood in new_street_lengths_by_neighborhood[i].keys(): db.streets_by_neighborhood.insert_one({"name":new_street_names[i], "neighborhood":hood, "length":new_street_lengths_by_neighborhood[i][hood]})
def get_streets(city): print "getting streets..." ## connect to mongodb server and create database... client = MongoClient() db= client.scf_data ## iterate over new streets, ## either searching the data ## for new streets or assigning ## issue to existing street id new_street_names=[] new_street_ids=[] new_street_copies=[] new_street_copies_ids=[] print ".......getting street names..." for issue in db.issues.find({"city_id":city["id"]}): if issue["street_id"]==-1: street_name=get_street_name(issue["address"]) street=db.streets.find_one({ "city_id":city["id"], "name":street_name}) if street: db.issues.update_one( {"id":issue["id"]},{"$set": {"street_id":street["id"]}}) elif street_name not in new_street_names: ## store new street names in a list to process all at once ## to save time (parsing the xml file takes forever...) new_street_names.append(street_name) new_street_ids.append(issue["id"]) else: ## store copies of new streets in a separate list new_street_copies.append(street_name) new_street_copies_ids.append(issue["id"]) ## street_lengths is a dictionary, with keys as the name of ## each street that was found, and values representing the ## length of each street street_lengths = get_street_lengths(new_street_names,city) for ind in range(len(new_street_names)): if new_street_names[ind] in street_lengths.keys(): if db.streets.find().count()>0: last_street_id=db.streets.find().sort("id",pymongo.DESCENDING)[0]["id"] else: last_street_id=0 db.streets.insert_one({ "id":last_street_id+1, "name":new_street_names[ind], "city_id":city["id"], "length":street_lengths[new_street_names[ind]]}) db.issues.update_one( {"id":new_street_ids[ind]}, {"$set":{"street_id":last_street_id+1}}) else: db.issues.update_one( {"id":new_street_ids[ind]}, {"$set":{"street_id":0}}) ## update copies of new street names separately... for ind in range(len(new_street_copies)): if new_street_copies[ind] in street_lengths.keys(): street_id=db.streets.find_one({ "city_id":city["id"], "name":new_street_copies[ind]})["id"] db.issues.update_one( {"id":new_street_copies_ids[ind]}, {"$set":{"street_id":street_id}}) else: db.issues.update_one( {"id":new_street_copies_ids[ind]}, {"$set":{"street_id":0}})
from get_street_name import get_street_name from pymongo import MongoClient client = MongoClient() db = client.nh4 issues = db.issues.find() addrs=[] for i in range(100): addrs.append(issues.next()["address"]) street_names=get_street_name(addrs) print street_names