Exemple #1
0
def load_winnings():
    """open and read winnings data and load it into the database
        The winnings records contain address components as well as retailer ids
        but this will only use the retailer ids and will not load the addresses
        from the winnings
    """
    root = '/home/bengolder/webapps/citydigits/citydigits/lottery/sample_data/'
    folder = os.path.join(root, 'xls')
    winfiles = [n for n in os.listdir(folder) if n[-4:] == '.xls']
    winnings = [os.path.join(folder, w) for w in winfiles]
    for f in winnings:
        new_wins = xls_to_dicts(f, column_to_datetime='Date Won/Claimed')
        for win in new_wins:
            retailer_id = str(int(win['Ret #']))
            try:
                retailer = Retailer.objects.get(retailer_id=retailer_id)
            except:
                retailer = None
            if retailer:
                print 'found at %s' % retailer
                date = win['Date Won/Claimed']
                amount = win['Prize']
                game = win['Game Name']
                win_obj = Win()
                win_obj.retailer = retailer
                win_obj.date = date
                win_obj.amount = amount
                win_obj.game = game
                win_obj.save()
                print 'saved'
Exemple #2
0
def load_winnings():
    """open and read winnings data and load it into the database
        The winnings records contain address components as well as retailer ids
        but this will only use the retailer ids and will not load the addresses
        from the winnings
    """
    root = '/home/bengolder/webapps/citydigits/citydigits/lottery/sample_data/'
    folder = os.path.join(root, 'xls')
    winfiles = [n for n in os.listdir(folder) if n[-4:] == '.xls']
    winnings = [os.path.join(folder, w) for w in winfiles]
    for f in winnings:
        new_wins = xls_to_dicts(f, column_to_datetime='Date Won/Claimed')
        for win in new_wins:
            retailer_id = str(int(win['Ret #']))
            try:
                retailer = Retailer.objects.get(retailer_id=retailer_id)
            except:
                retailer = None
            if retailer:
                print 'found at %s' % retailer
                date = win['Date Won/Claimed']
                amount = win['Prize']
                game = win['Game Name']
                win_obj = Win()
                win_obj.retailer = retailer
                win_obj.date = date
                win_obj.amount = amount
                win_obj.game = game
                win_obj.save()
                print 'saved'
Exemple #3
0
def build_graph():

    folder = "/Users/benjamin/projects/mitdusp/data/"
    fname = "graph_data.xlsx"


    path = os.path.join(folder, fname)

    sheets = [
            "faculty",
            "topics",
            "affiliations",
            "problems",
            "methods",
            ]

    faculty = xls_to_dicts(path, "faculty")
    topics = xls_to_dicts(path, "topics")
    affiliations = xls_to_dicts(path, "affiliations")
    problems = xls_to_dicts(path, "problems")
    methods = xls_to_dicts(path, "methods")

    add_nodes(faculty)
    add_nodes(topics)

    for d in simpl(affiliations):
        if d['group'] not in g:
            g.add_node( d['group'], **{
                'name': d['group'],
                'type': 'program group',
                })
        g.add_edge( d['name'], d['group'], **{
            'level': d['level'],
            })

    for d in simpl(problems):
        if d['target'] not in g:
            g.add_node( d['target'], **{
                'name': d['target'],
                'type': 'topic',
                })
        g.add_edge( d['start'], d['target'])
Exemple #4
0
def repair_points():
    """
    What it should do now that I have corrections.
        if the address is not found:
            look in the list of not_found locations
            get the listed_address
            use that to look up the listed location
            treat it as found
    """
    locations = read( 'filtered_ny_locations' )
    sales = xls_to_dicts( sales_xls )
    corrections = xls_to_dicts( corrections_xls )
    for row in sales:
        add = row['address']
        street_add = row['street_address']
        # deal with the broken ones
        if add not in locations:
            # find the correction
            corrected = find_dict( street_add, corrections, 'sales_address' )
            if not corrected:
                print add
            location = locations[corrected['listed_address']]
        else:
            location = locations[add]
        # make the point and location objects
        point = Point( row['lng'], row['lat'] )
        loc = Location()
        loc.point = point
        # use the address information from sales, not from the retailer
        # listings, because these are the addresses that were geocoded
        loc.address_text = add
        loc.raw_address_text = location['address']
        loc.street_address = street_add
        loc.city = row['city']
        loc.state = 'NY'
        loc.zipcode = int(row['zipcode'])
        # save the new location object
        loc.save()
Exemple #5
0
def repair_points():
    """
    What it should do now that I have corrections.
        if the address is not found:
            look in the list of not_found locations
            get the listed_address
            use that to look up the listed location
            treat it as found
    """
    locations = read('filtered_ny_locations')
    sales = xls_to_dicts(sales_xls)
    corrections = xls_to_dicts(corrections_xls)
    for row in sales:
        add = row['address']
        street_add = row['street_address']
        # deal with the broken ones
        if add not in locations:
            # find the correction
            corrected = find_dict(street_add, corrections, 'sales_address')
            if not corrected:
                print add
            location = locations[corrected['listed_address']]
        else:
            location = locations[add]
        # make the point and location objects
        point = Point(row['lng'], row['lat'])
        loc = Location()
        loc.point = point
        # use the address information from sales, not from the retailer
        # listings, because these are the addresses that were geocoded
        loc.address_text = add
        loc.raw_address_text = location['address']
        loc.street_address = street_add
        loc.city = row['city']
        loc.state = 'NY'
        loc.zipcode = int(row['zipcode'])
        # save the new location object
        loc.save()
Exemple #6
0
def newgraph():
    fname = "projects3.xls"
    folder = "data"
    path = os.path.join(folder, fname)
    projects = xls_to_dicts(path, "projects")
    people = xls_to_dicts(path, "people")
    topics = xls_to_dicts(path, "topics")
    # add the nodes to the graph
    # be sure to construct ids
    for person in people:
        person['id'] = idify(person['name'])
        g.add_node(person['id'], **person)
    for topic in topics:
        topic['id'] = idify(topic['name'])
        g.add_node(topic['id'], **topic)
    for project in projects:
        p = project['name']
        pcore = {
                'name':project['name'],
                'description':project['detail'],
                'type':project['type'],
                }
        pcore['id'] = idify(p)
        if p not in g:
            g.add_node(pcore['id'], **pcore)
        for k in project:
            if idify(k) in g:
                if project[k] == 'x':
                    # link the project to the topic
                    g.add_edge(idify(k), idify(p))
                    # link the person to the topic
                    g.add_edge(idify(k), idify(project['names']))
                    #print "linked %s to %s" % (k, p)
        if idify(project['names']) not in g:
                print "can't find", project["names"]
        else:
            g.add_edge(idify(project['names']), idify(p))
Exemple #7
0
def load_points(): # load these into django models and save them
    """Run Third
        This compares the filtered addresses to the previously geocoded points,
        in order to determine the lat lng of each location.  It simply records
        what was and was not found. Ater this step it is necessary to correct
        the addresses that did not match. The resulting corrections can be found
        in the file 'notfound_location_corrections.xls'.
    """
    # locations are the listed locations
    locations = read( 'filtered_ny_locations' )

    # sales are the sales locations
    sales = xls_to_dicts( sales_xls )

    not_found = {}
    found = {}
    for row in sales:
        add = row['address']
        if add in locations:
            print 'FOUND: %s' % add
            locations[add]['lat'] = row['lat']
            locations[add]['lng'] = row['lng']
            found[add] = locations[add]
        else:
            print 'NOT FOUND: %s' % add
            not_found[add] = {
                    'address': add,
                    'street_address':row['street_address'],
                    'city':row['city'],
                    'state':row['state'],
                    'zipcode':row['zipcode'],
                    'name':row['name'],
                    }
    write( 'found_ny_locations', found )
    write( 'notfound_ny_locations', not_found )
    xloc = [locations[k] for k in locations]
    keys = xloc[0].keys()
    keys.extend( ['lat', 'lng'] )
    xfound = [found[k] for k in found]
    xnot_found = [not_found[k] for k in not_found]
    xls( 'all_locations.xls', xloc, keys )
    xls( 'found_ny_locations.xls', xfound )
    xls( 'notfound_ny_locations.xls', xnot_found )
    r = read( 'raw_ny_retailers' )
    sellers = [r[k] for k in r]
    xls( 'retailers.xls', sellers )
Exemple #8
0
def load_points():  # load these into django models and save them
    """Run Third
        This compares the filtered addresses to the previously geocoded points,
        in order to determine the lat lng of each location.  It simply records
        what was and was not found. Ater this step it is necessary to correct
        the addresses that did not match. The resulting corrections can be found
        in the file 'notfound_location_corrections.xls'.
    """
    # locations are the listed locations
    locations = read('filtered_ny_locations')

    # sales are the sales locations
    sales = xls_to_dicts(sales_xls)

    not_found = {}
    found = {}
    for row in sales:
        add = row['address']
        if add in locations:
            print 'FOUND: %s' % add
            locations[add]['lat'] = row['lat']
            locations[add]['lng'] = row['lng']
            found[add] = locations[add]
        else:
            print 'NOT FOUND: %s' % add
            not_found[add] = {
                'address': add,
                'street_address': row['street_address'],
                'city': row['city'],
                'state': row['state'],
                'zipcode': row['zipcode'],
                'name': row['name'],
            }
    write('found_ny_locations', found)
    write('notfound_ny_locations', not_found)
    xloc = [locations[k] for k in locations]
    keys = xloc[0].keys()
    keys.extend(['lat', 'lng'])
    xfound = [found[k] for k in found]
    xnot_found = [not_found[k] for k in not_found]
    xls('all_locations.xls', xloc, keys)
    xls('found_ny_locations.xls', xfound)
    xls('notfound_ny_locations.xls', xnot_found)
    r = read('raw_ny_retailers')
    sellers = [r[k] for k in r]
    xls('retailers.xls', sellers)