def load_winnings(): """open and read winnings data and load it into the database The winnings records contain address components as well as retailer ids but this will only use the retailer ids and will not load the addresses from the winnings """ root = '/home/bengolder/webapps/citydigits/citydigits/lottery/sample_data/' folder = os.path.join(root, 'xls') winfiles = [n for n in os.listdir(folder) if n[-4:] == '.xls'] winnings = [os.path.join(folder, w) for w in winfiles] for f in winnings: new_wins = xls_to_dicts(f, column_to_datetime='Date Won/Claimed') for win in new_wins: retailer_id = str(int(win['Ret #'])) try: retailer = Retailer.objects.get(retailer_id=retailer_id) except: retailer = None if retailer: print 'found at %s' % retailer date = win['Date Won/Claimed'] amount = win['Prize'] game = win['Game Name'] win_obj = Win() win_obj.retailer = retailer win_obj.date = date win_obj.amount = amount win_obj.game = game win_obj.save() print 'saved'
def build_graph(): folder = "/Users/benjamin/projects/mitdusp/data/" fname = "graph_data.xlsx" path = os.path.join(folder, fname) sheets = [ "faculty", "topics", "affiliations", "problems", "methods", ] faculty = xls_to_dicts(path, "faculty") topics = xls_to_dicts(path, "topics") affiliations = xls_to_dicts(path, "affiliations") problems = xls_to_dicts(path, "problems") methods = xls_to_dicts(path, "methods") add_nodes(faculty) add_nodes(topics) for d in simpl(affiliations): if d['group'] not in g: g.add_node( d['group'], **{ 'name': d['group'], 'type': 'program group', }) g.add_edge( d['name'], d['group'], **{ 'level': d['level'], }) for d in simpl(problems): if d['target'] not in g: g.add_node( d['target'], **{ 'name': d['target'], 'type': 'topic', }) g.add_edge( d['start'], d['target'])
def repair_points(): """ What it should do now that I have corrections. if the address is not found: look in the list of not_found locations get the listed_address use that to look up the listed location treat it as found """ locations = read( 'filtered_ny_locations' ) sales = xls_to_dicts( sales_xls ) corrections = xls_to_dicts( corrections_xls ) for row in sales: add = row['address'] street_add = row['street_address'] # deal with the broken ones if add not in locations: # find the correction corrected = find_dict( street_add, corrections, 'sales_address' ) if not corrected: print add location = locations[corrected['listed_address']] else: location = locations[add] # make the point and location objects point = Point( row['lng'], row['lat'] ) loc = Location() loc.point = point # use the address information from sales, not from the retailer # listings, because these are the addresses that were geocoded loc.address_text = add loc.raw_address_text = location['address'] loc.street_address = street_add loc.city = row['city'] loc.state = 'NY' loc.zipcode = int(row['zipcode']) # save the new location object loc.save()
def repair_points(): """ What it should do now that I have corrections. if the address is not found: look in the list of not_found locations get the listed_address use that to look up the listed location treat it as found """ locations = read('filtered_ny_locations') sales = xls_to_dicts(sales_xls) corrections = xls_to_dicts(corrections_xls) for row in sales: add = row['address'] street_add = row['street_address'] # deal with the broken ones if add not in locations: # find the correction corrected = find_dict(street_add, corrections, 'sales_address') if not corrected: print add location = locations[corrected['listed_address']] else: location = locations[add] # make the point and location objects point = Point(row['lng'], row['lat']) loc = Location() loc.point = point # use the address information from sales, not from the retailer # listings, because these are the addresses that were geocoded loc.address_text = add loc.raw_address_text = location['address'] loc.street_address = street_add loc.city = row['city'] loc.state = 'NY' loc.zipcode = int(row['zipcode']) # save the new location object loc.save()
def newgraph(): fname = "projects3.xls" folder = "data" path = os.path.join(folder, fname) projects = xls_to_dicts(path, "projects") people = xls_to_dicts(path, "people") topics = xls_to_dicts(path, "topics") # add the nodes to the graph # be sure to construct ids for person in people: person['id'] = idify(person['name']) g.add_node(person['id'], **person) for topic in topics: topic['id'] = idify(topic['name']) g.add_node(topic['id'], **topic) for project in projects: p = project['name'] pcore = { 'name':project['name'], 'description':project['detail'], 'type':project['type'], } pcore['id'] = idify(p) if p not in g: g.add_node(pcore['id'], **pcore) for k in project: if idify(k) in g: if project[k] == 'x': # link the project to the topic g.add_edge(idify(k), idify(p)) # link the person to the topic g.add_edge(idify(k), idify(project['names'])) #print "linked %s to %s" % (k, p) if idify(project['names']) not in g: print "can't find", project["names"] else: g.add_edge(idify(project['names']), idify(p))
def load_points(): # load these into django models and save them """Run Third This compares the filtered addresses to the previously geocoded points, in order to determine the lat lng of each location. It simply records what was and was not found. Ater this step it is necessary to correct the addresses that did not match. The resulting corrections can be found in the file 'notfound_location_corrections.xls'. """ # locations are the listed locations locations = read( 'filtered_ny_locations' ) # sales are the sales locations sales = xls_to_dicts( sales_xls ) not_found = {} found = {} for row in sales: add = row['address'] if add in locations: print 'FOUND: %s' % add locations[add]['lat'] = row['lat'] locations[add]['lng'] = row['lng'] found[add] = locations[add] else: print 'NOT FOUND: %s' % add not_found[add] = { 'address': add, 'street_address':row['street_address'], 'city':row['city'], 'state':row['state'], 'zipcode':row['zipcode'], 'name':row['name'], } write( 'found_ny_locations', found ) write( 'notfound_ny_locations', not_found ) xloc = [locations[k] for k in locations] keys = xloc[0].keys() keys.extend( ['lat', 'lng'] ) xfound = [found[k] for k in found] xnot_found = [not_found[k] for k in not_found] xls( 'all_locations.xls', xloc, keys ) xls( 'found_ny_locations.xls', xfound ) xls( 'notfound_ny_locations.xls', xnot_found ) r = read( 'raw_ny_retailers' ) sellers = [r[k] for k in r] xls( 'retailers.xls', sellers )
def load_points(): # load these into django models and save them """Run Third This compares the filtered addresses to the previously geocoded points, in order to determine the lat lng of each location. It simply records what was and was not found. Ater this step it is necessary to correct the addresses that did not match. The resulting corrections can be found in the file 'notfound_location_corrections.xls'. """ # locations are the listed locations locations = read('filtered_ny_locations') # sales are the sales locations sales = xls_to_dicts(sales_xls) not_found = {} found = {} for row in sales: add = row['address'] if add in locations: print 'FOUND: %s' % add locations[add]['lat'] = row['lat'] locations[add]['lng'] = row['lng'] found[add] = locations[add] else: print 'NOT FOUND: %s' % add not_found[add] = { 'address': add, 'street_address': row['street_address'], 'city': row['city'], 'state': row['state'], 'zipcode': row['zipcode'], 'name': row['name'], } write('found_ny_locations', found) write('notfound_ny_locations', not_found) xloc = [locations[k] for k in locations] keys = xloc[0].keys() keys.extend(['lat', 'lng']) xfound = [found[k] for k in found] xnot_found = [not_found[k] for k in not_found] xls('all_locations.xls', xloc, keys) xls('found_ny_locations.xls', xfound) xls('notfound_ny_locations.xls', xnot_found) r = read('raw_ny_retailers') sellers = [r[k] for k in r] xls('retailers.xls', sellers)