def read_csv(source_csv, city_name, city_tag, driver):
    city_options = City.objects.filter(tag=city_tag)
    print "Number of cities available: %s" % len(city_options)
    if not len(city_options):
        raise ValueError, "CITY NOT FOUND! run make_cities.py first"
        ## city = City()
        ## city.name = city_name
        ## city.tag = to_tag(city.name)
        ## city.save()
    else:
        city = city_options[0]

    print city

    position_file = "position.json"
    position = load_json(position_file, create=True)
    if not position:
        position = 0

    cache_file = "%s-20150525.json.bkup" % city.tag
    cache_destination = os.path.join(os.path.dirname(source_csv), cache_file)
    #keep a local copy of data we've processed...
    #this should help with subsequent calls
    #to make sure we don't need to duplicate calls to remote geolocation APIs:
    local_cache = load_json(cache_destination, create=True)
    if not local_cache.has_key('buildings'):
        local_cache['buildings'] = {}
    
    search_results = {}
    for key, value in local_cache['buildings'].items():
        #search_results[key] = Location(value)
        sr = SearchResults()
        sr.from_dict(value)
        #print
        #print sr
        #print 
        search_results[key] = sr

    #geocoder helper:
    #geo = Geo()

    provider = ''
    provider_options = ServiceProvider.objects.filter(name='City of Columbia')
    if len(provider_options):
        provider = provider_options[0]
    else:
        raise ValueError, "error finding utility_provider: %s matches" % len(provider_options)                    


    skips = 0
    with open(source_csv) as csvfile:

        reader = unicode_csv_reader(csvfile)

        #just print the first row:
        print '>, <'.join(reader.next())

        count = 0

        #want to randomize the order... distribute options more evenly
        #print len(reader)
        #exit()
        #in order to randomize, should randomize the order in the csv
        for row in reader:
            count += 1
            print "Looking at row: %s, position: %s" % (count, position)
            start = datetime.now()
            print "Started: ", start
            
            any_updated = False
            
            #could exit out early here, if needed
            if count > 10:
                #exit()
                pass

            #if you want to skip ahead more quickly:
            #if count < 0:
            if count < position:
                pass
            else:

                #print row
                objectid = row[0]


                ## no_units = row[12]


                #can pass this in as bldg_id to make_building
                #that gets used for parcel too
                parcel_id = row[1]
                bldg_id = parcel_id

                street_num = row[2]
                street_dir = row[3]
                street_name = row[4]
                street_sfx = row[5]
                #eg building number
                qualifier_pre = row[6]
                #eg "UNIT" or "APT"
                qualifier_post = row[7]
                apt_num = row[8]
                #skip row9 (in/out... whatever that means)
                zip_code = row[10]
                #skip row11, assessor id
                #skip row12, address num
                #skip row13, x
                #skip row14, y
                #xcoord == lng
                lng = row[15]
                lat = row[16]

                #entry floor number: (named 'z' in sheet)
                floor = row[17]

                #skip row18, strcid... not sure
                #skip row19, parent
                #skip row20, app_
                #skip row21, hteloc
                zone = row[22]
                bldg_type = row[23]
                #number of buildings
                bldg_num = row[24]
                no_units = row[25]

                #skip row[26], inspection type
                #skip row27, app number
                #skip row28, date received
                #skip row29, application type
                #skip row30, ownerid
                #skip row31, operator id
                #skip row32, agent_id
                #skip row33, mail to
                central_heat = row[34]
                if central_heat == 'Y':
                    central_heat = True
                else:
                    central_heat = False

                #heat mechanism? heat mechanic??? not sure
                heat_mech = row[35]
                #skip row36, agent id (2)
                #skip row37, agent last name
                #skip row38 agent first name
                #skip row39 agent middle initial
                #skip row40, agent title
                #skip row41, business name

                #could be owner, could be agent
                ## owner_name = row[42]
                ## owner_address1 = row[43]
                ## owner_address2 = row[44]
                ## owner_city = row[45]
                ## owner_state = row[46]
                ## owner_zip = row[47]


                #address = " ".join([street_num, street_dir, street_name, street_sfx, qualifier_pre, qualifier_post, apt_num])

                #this is causing problems with lookups in google
                if qualifier_pre == "DUP" or qualifier_pre == "DUPE" or qualifier_pre == "2-Jan" or qualifier_pre == "HM" or qualifier_pre == "DWN":
                    qualifier_pre = ''

                address_main = " ".join([street_num, street_dir, street_name, street_sfx, qualifier_pre])
                address_main = address_main.strip()
                #get rid of any double spaces
                address_main = address_main.replace("  ", " ")

                #similar to conversions,
                #but there are too many of these to list there
                if re.search('HOLLY RIDGE LN', address_main):
                    address_main = address_main.replace('HOLLY RIDGE LN', 'HOLLYRIDGE LN')
                if re.search('BERKSHIRE CT', address_main):
                    address_main = address_main.replace('BERKSHIRE CT', 'BERKSHIRE')
                    #address_main = ''
                if re.search('CAMERON CT', address_main):
                    address_main = address_main.replace('CAMERON CT', 'CAMERON')
                    #address_main = ''
                if re.search('ATHENS CT', address_main):
                    address_main = address_main.replace('ATHENS CT', 'ATHENS')
                    #address_main = ''
                if re.search('LAMAR CT', address_main):
                    address_main = address_main.replace('LAMAR CT', 'LAMAR')
                    #address_main = ''
                if re.search('MONITEAU CT', address_main):
                    address_main = address_main.replace('MONITEAU CT', 'MONITEAU')
                    #address_main = ''
                if re.search('IMPERIAL CT', address_main):
                    address_main = ''
                if re.search('PERKINS DR', address_main):
                    address_main = ''
                if re.search('GRANITE OAKS CT', address_main):
                    address_main = ''

                    

                #sometimes the 'BLDG' data is added in the wrong place
                #then it gets treated as a unit item
                #(but it's not *always* a unit item, so can't generalize it that way)
                if qualifier_post == "BLDG" or qualifier_post == "LOT":
                    address_main = " ".join([address_main, qualifier_post, apt_main])
                    address_main = address_main.strip()
                    apt_main = ''
                else:
                    apt_main = " ".join([qualifier_post, apt_num])
                    apt_main = apt_main.strip()

                #check if this is one we want to skip
                if conversions.has_key(address_main.upper()):
                    address_main = conversions[address_main.upper()]

                if address_main:
                    print "APT_MAIN: ", apt_main
                    address = ", ".join( [address_main, apt_main] )


                ## if (not status in ['EXPIRED', 'CLOSED']) and (permit_type in ['RENTAL']):

                print "Parcel ID:", parcel_id
                print address

                results = None

                #make sure it's not one we're skipping:
                if not address:
                    print "SKIPPING ITEM: %s" % row[1]
                    skips += 1

                    ## skips = codecs.open("skips.txt", 'a', encoding='utf-8')
                    ## original = " ".join([street_num, street_dir, street_name, street_sfx, qualifier_pre])
                    ## skips.write(original)
                    ## skips.write('\n')
                    ## skips.close()
                    
                #check if we've started processing any results for this row
                elif not search_results.has_key(address.upper()):
                    print "No saved search results for address: %s" % address
                    print "Skipping."
                    print
                    #raise ValueError, "No results found for %s" % address

                else:
                    
                    print "Already had building: %s" % address
                    results = search_results[address.upper()]

                    assert results
                    #print results

                    lookup_building_with_geo(results, make=True, parcel_id=parcel_id)
                    #print results
                    #current['results'] = results

                    #print results

                    if results.errors:
                        print results
                        raise ValueError, results.errors
                    else:

                        bldg = results.building
                        assert bldg
                        unit = results.unit

                        #at this point there should be at least one unit
                        #and we will want to associate results with that unit
                        #assert unit
                        # can just pass this up in this case

                        if not unit:
                            print "Skipping address... no matching Unit!"

                        else:


                            #now that we have a building
                            #look up energy data on the remote website

                            #result = urllib2.urlopen("http://example.com/foo/bar")
                            #print result.read()

                            ## base = "http://www.gocolumbiamo.com/cfforms/ub/rental.html"
                            ## driver.get(base)
                            ## search = driver.find_element_by_css_selector('#address')
                            ## search.send_keys(address)
                            ## button = driver.find_element_by_css_selector('.ui-bar > a:nth-child(2)')
                            ## #button = driver.find_element_by_css_selector('#PrimaryCenterColumn > div > div.ui-bar-b.ui-header > div > a.ui-btn.ui-btn-corner-all.ui-shadow.ui-btn-down-b.ui-btn-up-b')
                            ## #button = driver.find_element_by_css_selector('#PrimaryCenterColumn > div > div.ui-bar-b.ui-header > div > a.ui-btn.ui-btn-corner-all.ui-shadow.ui-btn-down-b.ui-btn-up-b > span > span')
                            ## button.click()
                            ## time.sleep(4)

                            ## #results = driver.find_element_by_css_selector('.dojoxGridMasterView')
                            ## results = driver.find_element_by_css_selector('.dojoxGridContent > div:nth-child(1)')
                            ## print results.get_attribute('innerHTML')
                            ## print parcel_id

                            ## options = results.find_elements_by_tag_name('div')
                            ## #options = results.find_elements_by_link_text(parcel_id)
                            ## print options
                            ## #something didn't work with this:
                            ## #look_for = '<td tabindex="-1" role="gridcell" colspan="1" class="dojoxGridCell" idx="0" style="width:90px;">%s</td>' % parcel_id
                            ## look_for = '>%s<' % parcel_id

                            ## matches = []
                            ## for option in options:
                            ##     markup = option.get_attribute('innerHTML')
                            ##     #print markup
                            ##     if re.search(look_for, markup):
                            ##         matches.append(option)
                            ##         #print "MATCH!"

                            ## if len(matches) > 1:
                            ##     print matches
                            ##     raise ValueError, "Too many matches!"
                            ## else:
                            ##     matches[0].click()


                            #just realized that this form uses the property_id
                            #which we already have...
                            #can skip the steps above that are trying to make this link:

                            base = "http://www.gocolumbiamo.com/cfforms/ub/ubdata.cfm?LOCID=%s&AppNum=79" % parcel_id
                            driver.get(base)

                            try:
                                heat_source = driver.find_element_by_css_selector('#PrimaryCenterColumn > table:nth-child(1) > tbody:nth-child(1) > tr:nth-child(3) > td:nth-child(1) > strong:nth-child(1) > font:nth-child(1)')
                                if heat_source.text.strip() == "Heating Source: Gas Heat":
                                    bldg.heat_source_details = 'gas'
                                    bldg.save()
                                else:
                                    print heat_source.text
                                    exit()
                                    #TODO:
                                    bldg.heat_source_details = 'electric'
                                    bldg.who_pays_gas = 'not_available'
                            except:
                                print "heat source not found... skipping"
                                    
                            try:
                                selector = driver.find_element_by_css_selector('#el_table_length > label:nth-child(1) > select:nth-child(1) > option:nth-child(3)')
                                selector.click()
                            except:
                                print "No Water data available... skipping"
                            else:

                                body = driver.find_element_by_css_selector('#el_table > tbody:nth-child(3)')
                                rows = body.find_elements_by_tag_name('tr')
                                #row = rows[0]
                                query = bldg.utilitysummary_set.filter(type='electricity')
                                for row in rows:
                                    #print row.get_attribute('innerHTML')
                                    cols = row.find_elements_by_tag_name('td')
                                    date = cols[0].text + '-01'
                                    cost = cols[1].text.replace('$', '').strip()
                                    amount = cols[2].text
                                    amount = amount.replace(' KWH', '')
                                    update_summary(query, date, cost, amount, bldg, unit, provider, 'electricity', 'kwh')
                                    #update_summary(query, date, cost, amount)
                                    #for item in cols:
                                    #    print item.text


                            #print dir(bldg)
                            #print bldg.utilitysummary_set
                            #query = bldg.utilitysummary_set.filter(type=utility_type[0])
                            #could look up type from UTILITY_TYPES...
                            #but in this case we know what they should be
                            #query = bldg.utilitysummary_set.filter(type='water')
                            #if len(query):

                            try:
                                water = driver.find_element_by_css_selector('#ext-gen23')
                                water.click()

                                selector = driver.find_element_by_css_selector('#wr_table_length > label:nth-child(1) > select:nth-child(1) > option:nth-child(3)')
                                selector.click()
                            except:
                                print "No Water data available... skipping"
                            else:

                                body = driver.find_element_by_css_selector('#wr_table > tbody:nth-child(3)')

                                rows = body.find_elements_by_tag_name('tr')
                                #row = rows[0]
                                query = bldg.utilitysummary_set.filter(type='water')
                                for row in rows:
                                    #print row.get_attribute('innerHTML')
                                    cols = row.find_elements_by_tag_name('td')
                                    date = cols[0].text + '-01'
                                    cost = cols[1].text.replace('$', '').strip()
                                    amount = cols[2].text
                                    amount = amount.replace(' CCF', '')
                                    update_summary(query, date, cost, amount, bldg, unit, provider, 'water', 'ccf')
                                    #update_summary(query, date, cost, amount)
                                    #for item in cols:
                                    #    print item.text


                            unit.update_averages()

                            #see if we have enough info now to make a score:
                            unit.update_energy_score()

                            #now that we've saved the unit,
                            #update the averages for the whole building:
                            unit.building.update_utility_averages()
                            unit.building.update_rent_details()

                
                position += 1
                save_json(position_file, position)
        
            if any_updated:
                #back it up for later
                #enable this when downloading GPS coordinates...
                #the rest of the time it slows things down
                local_cache['buildings'] = {}
                for key, value in search_results.items():
                    #search_results[key] = SearchResults().from_dict(value)
                    local_cache['buildings'][key] = value.to_dict()
                save_json(cache_destination, local_cache)

                position = count
                save_json(position_file, position)
                exit()

            end = datetime.now()
            print "finished: ", end
            total_time = end - start
            print total_time

            print
コード例 #2
0
def read_csv(source_csv, city_tag, feed_date):
    #could also use city.models.find_by_city_state
    city_options = City.objects.filter(tag=city_tag)
    #print "Number of cities available: %s" % len(city_options)
    if not len(city_options):
        raise ValueError, "CITY NOT FOUND! run make_cities.py first"
    else:
        city = city_options[0]

    print city


    feeds = FeedInfo.objects.filter(city=city).filter(added=feed_date)
    if feeds.exists():
        feed = feeds[0]
        print "Already had feed: %s, %s" % (feed.city, feed.added)
    else:
        feed = FeedInfo()
        feed.city = city
        feed.added = feed_date
        feed.version = "0.1"
        feed.save()
        print "Created new feed: %s" % feed.city.name

    people = Person.objects.filter(name="Blank")
    if people.exists():
        person = people[0]
        print "Already had person: %s" % (person.name)
    else:
        person = Person()
        person.name = "Blank"
        person.save()
        print "Created new person: %s" % person.name

    sources = Source.objects.filter(feed=feed)
    if sources.exists():
        feed_source = sources[0]
        print "Already had source: %s, %s" % (feed_source.feed.city, feed_source.feed.added)
    else:
        feed_source = Source()
        feed_source.feed = feed
        feed_source.person = person
        feed_source.save()
        print "Created new source: %s" % feed_source.feed.city.name


    # ideally, should be able to use the database itself as the cache,
    # instead of using a local file
    # but it's also good to not have to repeat geo queries if going in bulk
    # the site code *will* make geo queries
    # so it's still a good idea to cache the coded address locally
    # even if using the site code for everything else.
    
    cache_file = "%s.json" % city.tag
    #print cache_file
    cache_destination = os.path.join(os.path.dirname(source_csv), cache_file)
    print cache_destination
    #keep a local copy of data we've processed...
    #this should help with subsequent calls
    #to make sure we don't need to duplicate calls to remote geolocation APIs:
    loaded_cache = load_json(cache_destination, create=True)

    #need to go through and load SearchResults separately
    local_cache = {}
    for key in loaded_cache.keys():
        #this is useful if there is a cached value 
        #that was not parsed correctly... this will remove it:
        #if key.strip() == "314 North Washington Street Apt. C":
        if key.strip() == "some address with bad cached data":
            print "not adding: ", key
            #exit()
            pass
        else:
            current = loaded_cache[key]
            results = current['results']
            #print results
            sr = SearchResults()
            #sr.from_dict(results, debug=True)
            sr.from_dict(results, debug=False)
            #print sr
            current['results'] = sr

            #print current['results']
            local_cache[key] = current
        
    #use street address as the key
    #for each address, store SearchResults object

    #reset skips for every run:
    skips = codecs.open("skips.txt", 'w', encoding='utf-8')
    skips.close()


    skips = 0
    #with codecs.open(source_csv, 'rb', encoding='utf-8') as csvfile:
    with open(source_csv) as csvfile:

        #reader = csv.reader(csvfile, delimiter=' ', quotechar='|')
        #reader = csv.reader(csvfile)
        #reader = unicodecsv.UnicodeReader(csvfile, encoding='utf-8')
        reader = unicode_csv_reader(csvfile)

        #just print the first row:
        print '>, <'.join(reader.next())
        print

        keys = []
        for item in reader.next():
            key = item.lower().strip()
            key = key.replace('(', '')
            key = key.replace(')', '')
            key = key.replace('-', '_')
            key = key.replace('.', '')
            key = key.replace('/ ', '')
            key = key.replace('/', '_')
            key = key.replace('"', '')
            key = key.replace('#', 'num')
            key = key.replace(' ', '_')
            keys.append(key)
        
        #*and* the second row in this case
        print '>, <'.join(keys)

        #currently:
        #<street_address>, <unit_if_applicable>, <unit_type>, <rent>, <security_deposit>, <sq_feet_per_unit>, <num_bedrooms>, <num_bathrooms>, <maximum_occupancy_per_unit>, <lease_period>, <availability>, <laundry>, <parking>, <air_conditioning>, <pets>, <gym_fitness_center>, <game_room_rec_center_community_center>, <pool>, <other_amenities>, <bike_friendly>, <recycling>, <composting>, <gardening>, <public_transit>, <walk_friendly>, <other_smartliving_features>, <who_pays_for_electricity>, <who_pays_for_natural_gas>, <who_pays_for_water>, <who_pays_for_trash_recycling_pickup>, <who_pays_for_telephone_land_line>, <who_pays_for_cable>, <who_pays_for_internet>, <electricity_provider>, <electric_utility_cost_average_per_mo>, <electric_utility_cost_low>, <electric_utility_cost_high>, <natural_gas_provider>, <natural_gas_utility_cost_average_per_mo>, <natural_gas_utility_cost_low>, <natural_gas_utility_cost_high>, <energy_saving_features>, <utility_info_source>, <agent_property_manager>, <property_website_url>, <agent_property_manager_address>, <agent_property_manager_phone>, <owner>, <comments>

        #exit()

        count = 0
        #start = 6439
        start = 0

        #if you want to randomize the order... to distribute options more evenly
        #just do this in the original spreadsheet.
        #in order to randomize, should randomize the order in the csv
        for row in reader:

            current = {}
            count += 1
            print "Looking at row: %s" % count
            
            #could exit out early here, if needed (for testing)
            if count > 7220:
                #all_done(cache_destination, local_cache)
                pass

            if count >= start:

                address = process_row(current, row, keys, local_cache, city, feed_source, count)
            
                print

                local_cache[address] = current
                #save every time...
                #never know when a crash will happen:
                #however, this does make things run considerably slower
                #especially once the cached file size grows.
                #save_results(cache_destination, local_cache)

                #exit()
            
    all_done(cache_destination, local_cache)
コード例 #3
0
def read_csv(source_csv, city_name, city_tag):
    city_options = City.objects.filter(tag=city_tag)
    print "Number of cities available: %s" % len(city_options)
    if not len(city_options):
        raise ValueError, "CITY NOT FOUND! run make_cities.py first"
        ## city = City()
        ## city.name = city_name
        ## city.tag = to_tag(city.name)
        ## city.save()
    else:
        city = city_options[0]

    print city

    feed_date = "2013-10-16"

    feeds = FeedInfo.objects.filter(city=city).filter(added=feed_date)
    if feeds.exists():
        feed = feeds[0]
        print "Already had feed: %s, %s" % (feed.city, feed.added)
    else:
        feed = FeedInfo()
        feed.city = city
        feed.added = feed_date
        feed.version = "0.1"
        feed.save()
        print "Created new feed: %s" % feed.city.name

    people = Person.objects.filter(name="Blank")
    if people.exists():
        person = people[0]
        print "Already had person: %s" % (person.name)
    else:
        person = Person()
        person.name = "Blank"
        person.save()
        print "Created new person: %s" % person.name

    sources = Source.objects.filter(feed=feed)
    if sources.exists():
        feed_source = sources[0]
        print "Already had source: %s, %s" % (feed_source.feed.city, feed_source.feed.added)
    else:
        feed_source = Source()
        feed_source.feed = feed
        feed_source.person = person
        feed_source.save()
        print "Created new source: %s" % feed_source.feed.city.name

    cache_file = "%s-20150525.json" % city.tag
    cache_destination = os.path.join(os.path.dirname(source_csv), cache_file)
    # keep a local copy of data we've processed...
    # this should help with subsequent calls
    # to make sure we don't need to duplicate calls to remote geolocation APIs:
    local_cache = load_json(cache_destination, create=True)
    if not local_cache.has_key("buildings"):
        local_cache["buildings"] = {}

    search_results = {}
    for key, value in local_cache["buildings"].items():
        # search_results[key] = Location(value)
        sr = SearchResults()
        sr.from_dict(value)
        # print
        # print sr
        # print
        search_results[key] = sr

    # geocoder helper:
    # geo = Geo()

    skips = 0
    with open(source_csv) as csvfile:

        reader = unicode_csv_reader(csvfile)

        # just print the first row:
        print ">, <".join(reader.next())

        count = 0

        # want to randomize the order... distribute options more evenly
        # print len(reader)
        # exit()
        # in order to randomize, should randomize the order in the csv
        for row in reader:
            count += 1
            print "Looking at row: %s" % count

            any_updated = False

            # could exit out early here, if needed
            if count > 10:
                # exit()
                pass

            # if you want to skip ahead more quickly:
            if count < 27187:
                pass
            else:

                # print row
                objectid = row[0]

                ## no_units = row[12]

                # can pass this in as bldg_id to make_building
                # that gets used for parcel too
                parcel_id = row[1]
                bldg_id = parcel_id

                street_num = row[2]
                street_dir = row[3]
                street_name = row[4]
                street_sfx = row[5]
                # eg building number
                qualifier_pre = row[6]
                # eg "UNIT" or "APT"
                qualifier_post = row[7]
                apt_num = row[8]
                # skip row9 (in/out... whatever that means)
                zip_code = row[10]
                # skip row11, assessor id
                # skip row12, address num
                # skip row13, x
                # skip row14, y
                # xcoord == lng
                lng = row[15]
                lat = row[16]

                # entry floor number: (named 'z' in sheet)
                floor = row[17]

                # skip row18, strcid... not sure
                # skip row19, parent
                # skip row20, app_
                # skip row21, hteloc
                zone = row[22]
                bldg_type = row[23]
                # number of buildings
                bldg_num = row[24]
                no_units = row[25]

                # skip row[26], inspection type
                # skip row27, app number
                # skip row28, date received
                # skip row29, application type
                # skip row30, ownerid
                # skip row31, operator id
                # skip row32, agent_id
                # skip row33, mail to
                central_heat = row[34]
                if central_heat == "Y":
                    central_heat = True
                else:
                    central_heat = False

                # heat mechanism? heat mechanic??? not sure
                heat_mech = row[35]
                # skip row36, agent id (2)
                # skip row37, agent last name
                # skip row38 agent first name
                # skip row39 agent middle initial
                # skip row40, agent title
                # skip row41, business name

                # could be owner, could be agent
                owner_name = row[42]
                owner_address1 = row[43]
                owner_address2 = row[44]
                owner_city = row[45]
                owner_state = row[46]
                owner_zip = row[47]

                # address = " ".join([street_num, street_dir, street_name, street_sfx, qualifier_pre, qualifier_post, apt_num])

                # this is causing problems with lookups in google
                if (
                    qualifier_pre == "DUP"
                    or qualifier_pre == "DUPE"
                    or qualifier_pre == "2-Jan"
                    or qualifier_pre == "HM"
                    or qualifier_pre == "DWN"
                ):
                    qualifier_pre = ""

                address_main = " ".join([street_num, street_dir, street_name, street_sfx, qualifier_pre])
                address_main = address_main.strip()
                # get rid of any double spaces
                address_main = address_main.replace("  ", " ")

                # similar to conversions,
                # but there are too many of these to list there
                if re.search("HOLLY RIDGE LN", address_main):
                    address_main = address_main.replace("HOLLY RIDGE LN", "HOLLYRIDGE LN")
                if re.search("BERKSHIRE CT", address_main):
                    address_main = address_main.replace("BERKSHIRE CT", "BERKSHIRE")
                    # address_main = ''
                if re.search("CAMERON CT", address_main):
                    address_main = address_main.replace("CAMERON CT", "CAMERON")
                    # address_main = ''
                if re.search("ATHENS CT", address_main):
                    address_main = address_main.replace("ATHENS CT", "ATHENS")
                    # address_main = ''
                if re.search("LAMAR CT", address_main):
                    address_main = address_main.replace("LAMAR CT", "LAMAR")
                    # address_main = ''
                if re.search("MONITEAU CT", address_main):
                    address_main = address_main.replace("MONITEAU CT", "MONITEAU")
                    # address_main = ''
                if re.search("IMPERIAL CT", address_main):
                    address_main = ""
                if re.search("PERKINS DR", address_main):
                    address_main = ""
                if re.search("GRANITE OAKS CT", address_main):
                    address_main = ""

                # sometimes the 'BLDG' data is added in the wrong place
                # then it gets treated as a unit item
                # (but it's not *always* a unit item, so can't generalize it that way)
                if qualifier_post == "BLDG" or qualifier_post == "LOT":
                    address_main = " ".join([address_main, qualifier_post, apt_main])
                    address_main = address_main.strip()
                    apt_main = ""
                else:
                    apt_main = " ".join([qualifier_post, apt_num])
                    apt_main = apt_main.strip()

                # check if this is one we want to skip
                if conversions.has_key(address_main.upper()):
                    address_main = conversions[address_main.upper()]

                if address_main:
                    print "APT_MAIN: ", apt_main
                    address = ", ".join([address_main, apt_main])
                else:
                    address = ""

                owner_address = ", ".join([owner_address1, owner_address2, owner_city, owner_state, owner_zip])

                ## if (not status in ['EXPIRED', 'CLOSED']) and (permit_type in ['RENTAL']):

                print "Parcel ID:", parcel_id
                print address

                results = None

                # make sure it's not one we're skipping:
                if not address:
                    print "SKIPPING ITEM: %s" % row[1]
                    skips += 1

                    skipf = codecs.open("skips.txt", "a", encoding="utf-8")
                    original = " ".join([street_num, street_dir, street_name, street_sfx, qualifier_pre])
                    skipf.write(original)
                    skipf.write("\n")
                    skipf.close()

                else:
                    # check if we've started processing any results for this row
                    if search_results.has_key(address.upper()):
                        print "Already had building: %s" % address
                        results = search_results[address.upper()]
                        # print results
                    else:

                        addy = ", ".join([address_main, city.name, city.state])
                        addy += " " + zip_code
                        # addy += ", USA"
                        print addy

                        # toggle betweeen an actual google query
                        results = address_search(addy, apt_main)

                        # print dir(results)

                        if len(results.matches) > 1:
                            print results
                            for option in results.matches:
                                print "%s: %s, %s" % (option["place"], option["lat"], option["lng"])
                            print
                            print "Source Lat: %s, Lng: %s" % (lat, lng)
                            src_lat = int(float(lat) * 100)
                            src_lng = int(float(lng) * 100)

                            matched = False
                            for current in results.matches:
                                # current = results.matches[0]
                                print current["lat"]
                                print current["lng"]
                                # only want to look at the first 2 decimal places:
                                comp_lat = int(float(current["lat"]) * 100)
                                comp_lng = int(float(current["lng"]) * 100)
                                print comp_lat
                                print comp_lng

                                if (src_lat == comp_lat) and (src_lng == comp_lng):
                                    # results.matches = results.matches[:1]
                                    results.matches = [current]
                                    matched = True

                            if not matched:
                                print "DIDN'T MATCH!"
                                exit()

                        any_updated = True

                        # or just using results as specified in csv
                        # (THIS DOES NOT NORMALIZE THE ADDRESS VIA GOOGLE)
                        # results = SearchResults()
                        # results.unit_text = apt_main
                        # handle_place(results, addy, lat, lng, apt_main)

                    assert results
                    # print results

                    lookup_building_with_geo(results, make=True, parcel_id=parcel_id)
                    # print results
                    # current['results'] = results

                    # print results

                    if results.errors:
                        print results
                        raise ValueError, results.errors
                    else:

                        search_results[address.upper()] = results

                        bldg = results.building
                        assert bldg
                        unit = results.unit

                        # may be a case where the unit is blank
                        # and another unit with an number/letter was created earlier
                        # in that case, we won't be creating one here
                        # and the building will already exist...
                        # not necessarily an error though
                        # just redundant data
                        # assert unit

                        (person, bldg_person) = make_person(owner_name, bldg, "Agent", address=owner_address)

                    # time.sleep(1)

            if any_updated:
                # back it up for later
                # enable this when downloading GPS coordinates...
                # the rest of the time it slows things down
                local_cache["buildings"] = {}
                for key, value in search_results.items():
                    # search_results[key] = SearchResults().from_dict(value)
                    local_cache["buildings"][key] = value.to_dict()
                save_json(cache_destination, local_cache)

            print