Example #1
0
def repair_sales():
    """This function links sales to a retailer with a specific retailer ID.
        It uses the listed address in the sales data to look up a location.
        with that location, it finds all the retailers associated with the
        location. Using the retailer name listed in the sales data, it tries to
        find a retailer associated with the location.
        retailer and address combinations that cannot be found (and thus do not
        match other data supplied by NY Lottery) are written to a separate
        excel file.
    """
    bad_addresses = []
    bad_names = []
    for sales_file in sales_files:
        fields = ('name', 'street_address', 'city', 'zipcode', 'date',
                  'amount')
        iterator = csv_dictionaries(sales_file, fields)
        for row in iterator:
            if int(row['zipcode']) in nyc_zips:
                # filter street address to match filtered addresses
                row['street_address'] = filter_address(row['street_address'])
                row['amount'] = float(row['amount'])
                row['state'] = 'NY'
                # create address key for location lookup
                key = address_key(row, 'street_address')
                # search for location
                loc_results = Location.objects.filter(address_text=key)
                # if not found, store object in bad_address list
                if not loc_results or len(loc_results) > 1:
                    bad_addresses.append(row)
                else:
                    location = loc_results[0]
                    # if found, search location retailers for retailer name
                    retailers = location.retailer_set.filter(name=row['name'])
                    # if no retailer found, store in bad_name list
                    if not retailers or len(retailers) > 1:
                        bad_names.append(row)
                    else:
                        # otherwise, create a sales week object,
                        saleswk = SalesWeek()
                        saleswk.retailer = retailers[0]
                        saleswk.amount = row['amount']
                        datetext = row['date']
                        # don't forget to convert the date.
                        date = datetime.datetime.strptime(datetext, '%Y-%m-%d')
                        saleswk.week = date
                        # save it
                        saleswk.save()
    # write the failures to excel files
    xls('sales-bad_address.xls', bad_addresses)
    xls('sales-bad_names.xls', bad_names)
Example #2
0
def repair_sales():
    """This function links sales to a retailer with a specific retailer ID.
        It uses the listed address in the sales data to look up a location.
        with that location, it finds all the retailers associated with the
        location. Using the retailer name listed in the sales data, it tries to
        find a retailer associated with the location.
        retailer and address combinations that cannot be found (and thus do not
        match other data supplied by NY Lottery) are written to a separate
        excel file.
    """
    bad_addresses = []
    bad_names = []
    for sales_file in sales_files:
        fields = ('name','street_address','city','zipcode','date','amount')
        iterator = csv_dictionaries( sales_file, fields )
        for row in iterator:
            if int(row['zipcode']) in nyc_zips:
                # filter street address to match filtered addresses
                row['street_address'] = filter_address( row['street_address'] )
                row['amount'] = float(row['amount'])
                row['state'] = 'NY'
                # create address key for location lookup
                key = address_key( row, 'street_address')
                # search for location
                loc_results = Location.objects.filter(address_text=key)
                # if not found, store object in bad_address list
                if not loc_results or len(loc_results) > 1:
                    bad_addresses.append( row )
                else:
                    location = loc_results[0]
                    # if found, search location retailers for retailer name
                    retailers = location.retailer_set.filter(name=row['name'])
                    # if no retailer found, store in bad_name list
                    if not retailers or len(retailers) > 1:
                        bad_names.append(row)
                    else:
                        # otherwise, create a sales week object,
                        saleswk = SalesWeek()
                        saleswk.retailer = retailers[0]
                        saleswk.amount = row['amount']
                        datetext = row['date']
                        # don't forget to convert the date.
                        date = datetime.datetime.strptime(datetext, '%Y-%m-%d')
                        saleswk.week = date
                        # save it
                        saleswk.save()
    # write the failures to excel files
    xls('sales-bad_address.xls', bad_addresses)
    xls('sales-bad_names.xls', bad_names)
Example #3
0
def load_locations(): # load these into django models and save them
    """Run First
        This extracts the addresses, and retailer IDs from the agent directory provided by NY
        Lotto.The addresses do not necessarily match the addresses that have
        been previously geocoded.
        It will only extract addresses with NY City zip codes.
    """
    # get raw locations
    rows = csv_dictionaries( agents )
    # make ny locations with raw locations
    retailers = {}
    locations = {}
    for agent in rows:
        if agent['BUSZIP'] in nyc_zips:
            raw_location = {
                    'street_address':agent['BUSADDR'],
                    'city':agent['BUSCITY'],
                    'state':'NY',
                    'zipcode':agent['BUSZIP'],
                    }
            full = address_key( raw_location, 'street_address' )
            raw_location['address'] = full

            # the key for locations is the full raw address
            if full not in locations:
                locations[full] = raw_location

            # the key for retailers is the agent number. And retailers retain
            # the full raw address.
            retailer = {
                    'name': agent['BUSNM'],
                    'retailer_id': agent['AGTNO'],
                    'location': full,
                    }
            retailers[ agent['AGTNO'] ] = retailer
    write( 'raw_ny_retailers', retailers )
    write( 'raw_ny_locations', locations )
Example #4
0
def load_locations():  # load these into django models and save them
    """Run First
        This extracts the addresses, and retailer IDs from the agent directory provided by NY
        Lotto.The addresses do not necessarily match the addresses that have
        been previously geocoded.
        It will only extract addresses with NY City zip codes.
    """
    # get raw locations
    rows = csv_dictionaries(agents)
    # make ny locations with raw locations
    retailers = {}
    locations = {}
    for agent in rows:
        if agent['BUSZIP'] in nyc_zips:
            raw_location = {
                'street_address': agent['BUSADDR'],
                'city': agent['BUSCITY'],
                'state': 'NY',
                'zipcode': agent['BUSZIP'],
            }
            full = address_key(raw_location, 'street_address')
            raw_location['address'] = full

            # the key for locations is the full raw address
            if full not in locations:
                locations[full] = raw_location

            # the key for retailers is the agent number. And retailers retain
            # the full raw address.
            retailer = {
                'name': agent['BUSNM'],
                'retailer_id': agent['AGTNO'],
                'location': full,
            }
            retailers[agent['AGTNO']] = retailer
    write('raw_ny_retailers', retailers)
    write('raw_ny_locations', locations)