Python Source Examples

Programming Language: Python

Namespace/Package Name: source.models

Class/Type: Source

Examples at hotexamples.com: 13

Python Source - 13 examples found. These are the top rated real world Python examples of source.models.Source extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Source(2)

create_sources(1)

get_sources(1)

Example #1

Show file

def upload_code(request):
    source = request.POST['code']
    user = request.session['user']

    if len(source.strip()) == 0:
        source = '''
/* Java demo code */
public class Test {
    public static Object execute(){
        return new java.util.Random().nextInt();
    }
}
'''
    print source
    print user

    sourceModel = Source(user=user, source=source)
    sourceModel.save()

    return direct_to_template(request, 'source/index.html',
            {'source' : source,
             'result' : java_compile_and_execute(source),
             'login' : user
    })

Example #2

Show file

    def update(self, dict_values, lang=get_language()):
        self.save()
        for field in self.complex_fields:
            field_name = field.get_field_str_id()

            if field_name in dict_values:
                sources = Source.create_sources(dict_values[field_name].get(
                    'sources', []))

                if field.sourced:
                    sources = {
                        'confidence': dict_values[field_name]['confidence'],
                        'sources': sources,
                    }
                    field.update(dict_values[field_name]['value'], lang,
                                 sources)
                else:
                    field.update(dict_values[field_name]['value'], lang)

Example #3

Show file

File: base_models.py Project: caravancoop/complex_fields

    def update(self, dict_values, lang=get_language()):
        self.save()
        for field in self.complex_fields:
            field_name = field.get_field_str_id()

            if field_name in dict_values:
                sources = Source.create_sources(
                    dict_values[field_name].get('sources', [])
                )

                if field.sourced:
                    sources = {
                        'confidence': dict_values[field_name]['confidence'],
                        'sources': sources,
                    }
                    field.update(dict_values[field_name]['value'], lang, sources)
                else:
                    field.update(dict_values[field_name]['value'], lang)

Example #4

Show file

File: models.py Project: caravancoop/complex_fields

    def get_history_for_lang(self, lang=get_language()):
        c_fields = self.field_model.objects.filter(object_ref=self.table_object)
        c_fields = c_fields.filter(lang=lang)
        history = []
        field = list(c_fields[:1])
        if field:
            field_history = reversion.get_for_object(field[0])
            history = [
                {
                    "value": fh.field_dict['value'],
                    "sources": [
                        src.source
                        for src in Source.get_sources(fh.field_dict['sources'])
                    ],
                    "id": fh.id
                }
                for fh in field_history
            ]

        return history

Example #5

Show file

File: import_master_template.py Project: City-of-Bloomington/rentrocket

def read_csv(source_csv, city_tag, feed_date):
    #could also use city.models.find_by_city_state
    city_options = City.objects.filter(tag=city_tag)
    #print "Number of cities available: %s" % len(city_options)
    if not len(city_options):
        raise ValueError, "CITY NOT FOUND! run make_cities.py first"
    else:
        city = city_options[0]

    print city


    feeds = FeedInfo.objects.filter(city=city).filter(added=feed_date)
    if feeds.exists():
        feed = feeds[0]
        print "Already had feed: %s, %s" % (feed.city, feed.added)
    else:
        feed = FeedInfo()
        feed.city = city
        feed.added = feed_date
        feed.version = "0.1"
        feed.save()
        print "Created new feed: %s" % feed.city.name

    people = Person.objects.filter(name="Blank")
    if people.exists():
        person = people[0]
        print "Already had person: %s" % (person.name)
    else:
        person = Person()
        person.name = "Blank"
        person.save()
        print "Created new person: %s" % person.name

    sources = Source.objects.filter(feed=feed)
    if sources.exists():
        feed_source = sources[0]
        print "Already had source: %s, %s" % (feed_source.feed.city, feed_source.feed.added)
    else:
        feed_source = Source()
        feed_source.feed = feed
        feed_source.person = person
        feed_source.save()
        print "Created new source: %s" % feed_source.feed.city.name


    # ideally, should be able to use the database itself as the cache,
    # instead of using a local file
    # but it's also good to not have to repeat geo queries if going in bulk
    # the site code *will* make geo queries
    # so it's still a good idea to cache the coded address locally
    # even if using the site code for everything else.
    
    cache_file = "%s.json" % city.tag
    #print cache_file
    cache_destination = os.path.join(os.path.dirname(source_csv), cache_file)
    print cache_destination
    #keep a local copy of data we've processed...
    #this should help with subsequent calls
    #to make sure we don't need to duplicate calls to remote geolocation APIs:
    loaded_cache = load_json(cache_destination, create=True)

    #need to go through and load SearchResults separately
    local_cache = {}
    for key in loaded_cache.keys():
        #this is useful if there is a cached value 
        #that was not parsed correctly... this will remove it:
        #if key.strip() == "314 North Washington Street Apt. C":
        if key.strip() == "some address with bad cached data":
            print "not adding: ", key
            #exit()
            pass
        else:
            current = loaded_cache[key]
            results = current['results']
            #print results
            sr = SearchResults()
            #sr.from_dict(results, debug=True)
            sr.from_dict(results, debug=False)
            #print sr
            current['results'] = sr

            #print current['results']
            local_cache[key] = current
        
    #use street address as the key
    #for each address, store SearchResults object

    #reset skips for every run:
    skips = codecs.open("skips.txt", 'w', encoding='utf-8')
    skips.close()


    skips = 0
    #with codecs.open(source_csv, 'rb', encoding='utf-8') as csvfile:
    with open(source_csv) as csvfile:

        #reader = csv.reader(csvfile, delimiter=' ', quotechar='|')
        #reader = csv.reader(csvfile)
        #reader = unicodecsv.UnicodeReader(csvfile, encoding='utf-8')
        reader = unicode_csv_reader(csvfile)

        #just print the first row:
        print '>, <'.join(reader.next())
        print

        keys = []
        for item in reader.next():
            key = item.lower().strip()
            key = key.replace('(', '')
            key = key.replace(')', '')
            key = key.replace('-', '_')
            key = key.replace('.', '')
            key = key.replace('/ ', '')
            key = key.replace('/', '_')
            key = key.replace('"', '')
            key = key.replace('#', 'num')
            key = key.replace(' ', '_')
            keys.append(key)
        
        #*and* the second row in this case
        print '>, <'.join(keys)

        #currently:
        #<street_address>, <unit_if_applicable>, <unit_type>, <rent>, <security_deposit>, <sq_feet_per_unit>, <num_bedrooms>, <num_bathrooms>, <maximum_occupancy_per_unit>, <lease_period>, <availability>, <laundry>, <parking>, <air_conditioning>, <pets>, <gym_fitness_center>, <game_room_rec_center_community_center>, <pool>, <other_amenities>, <bike_friendly>, <recycling>, <composting>, <gardening>, <public_transit>, <walk_friendly>, <other_smartliving_features>, <who_pays_for_electricity>, <who_pays_for_natural_gas>, <who_pays_for_water>, <who_pays_for_trash_recycling_pickup>, <who_pays_for_telephone_land_line>, <who_pays_for_cable>, <who_pays_for_internet>, <electricity_provider>, <electric_utility_cost_average_per_mo>, <electric_utility_cost_low>, <electric_utility_cost_high>, <natural_gas_provider>, <natural_gas_utility_cost_average_per_mo>, <natural_gas_utility_cost_low>, <natural_gas_utility_cost_high>, <energy_saving_features>, <utility_info_source>, <agent_property_manager>, <property_website_url>, <agent_property_manager_address>, <agent_property_manager_phone>, <owner>, <comments>

        #exit()

        count = 0
        #start = 6439
        start = 0

        #if you want to randomize the order... to distribute options more evenly
        #just do this in the original spreadsheet.
        #in order to randomize, should randomize the order in the csv
        for row in reader:

            current = {}
            count += 1
            print "Looking at row: %s" % count
            
            #could exit out early here, if needed (for testing)
            if count > 7220:
                #all_done(cache_destination, local_cache)
                pass

            if count >= start:

                address = process_row(current, row, keys, local_cache, city, feed_source, count)
            
                print

                local_cache[address] = current
                #save every time...
                #never know when a crash will happen:
                #however, this does make things run considerably slower
                #especially once the cached file size grows.
                #save_results(cache_destination, local_cache)

                #exit()
            
    all_done(cache_destination, local_cache)

Example #6

Show file

File: convert-evanston.py Project: City-of-Bloomington/rentrocket

def read_csv(source_csv, city_name, city_tag):
    city_options = City.objects.filter(tag=city_tag)
    print "Number of cities available: %s" % len(city_options)
    if not len(city_options):
        raise ValueError, "CITY NOT FOUND! run make_cities.py first"
        ## city = City()
        ## city.name = city_name
        ## city.tag = to_tag(city.name)
        ## city.save()
    else:
        city = city_options[0]

    print city

    feed_date = "2013-07-31"

    feeds = FeedInfo.objects.filter(city=city).filter(added=feed_date)
    if feeds.exists():
        feed = feeds[0]
        print "Already had feed: %s, %s" % (feed.city, feed.added)
    else:
        feed = FeedInfo()
        feed.city = city
        feed.added = feed_date
        feed.version = "0.1"
        feed.save()
        print "Created new feed: %s" % feed.city.name

    people = Person.objects.filter(name="Blank")
    if people.exists():
        person = people[0]
        print "Already had person: %s" % (person.name)
    else:
        person = Person()
        person.name = "Blank"
        person.save()
        print "Created new person: %s" % person.name

    sources = Source.objects.filter(feed=feed)
    if sources.exists():
        feed_source = sources[0]
        print "Already had source: %s, %s" % (feed_source.feed.city, feed_source.feed.added)
    else:
        feed_source = Source()
        feed_source.feed = feed
        feed_source.person = person
        feed_source.save()
        print "Created new source: %s" % feed_source.feed.city.name


    cache_file = "%s.json" % city.tag
    cache_destination = os.path.join(os.path.dirname(source_csv), cache_file)
    #keep a local copy of data we've processed...
    #this should help with subsequent calls
    #to make sure we don't need to duplicate calls to remote geolocation APIs:
    local_cache = load_json(cache_destination, create=True)
    if not local_cache.has_key('buildings'):
        local_cache['buildings'] = {}
    if not local_cache.has_key('parcels'):
        local_cache['parcels'] = {}
    
    locations = {}
    for key, value in local_cache['buildings'].items():
        locations[key] = Location(value)

    #geocoder helper:
    geo = Geo()

    skips = 0
    #with codecs.open(source_csv, 'rb', encoding='utf-8') as csvfile:
    with open(source_csv) as csvfile:
        #reader = csv.reader(csvfile, delimiter=' ', quotechar='|')
        #reader = csv.reader(csvfile)
        #reader = unicodecsv.UnicodeReader(csvfile, encoding='utf-8')

        reader = unicode_csv_reader(csvfile)

        #just print the first row:
        print '>, <'.join(reader.next())

        count = 0
        for row in reader:
            count += 1
            print "Looking at row: %s" % count
            
            #could exit out early here, if needed
            if count > 1000:
                #exit()
                pass
            
            address = row[0]

            #need to fix the number being at the end of the address
            parts = address.split(',')
            anumber = parts[-1]
            parts = parts[:-1]
            street = ",".join(parts)
            address = "%s %s" % (anumber, street)


            invoice_number = row[1]
            bldg_id = row[1]
            print bldg_id

            #this is where owner is stored
            invoice_note = row[6]
            print invoice_note
            if re.match('Sent to:', invoice_note):
                print "changing invoice note from: %s" % invoice_note
                invoice_note = invoice_note[8:]
                print "to: %s" % invoice_note
            else:
                #raise ValueError, "invoice note does not start with Sent to"
                print "!!!!!invoice note does not start with Sent to!!!!!"
                print ""
                print ""

            no_units = row[12]
            
            ## #should always be "RENTAL" (don't need to track this one)
            ## permit_type = row[1]
            ## if not permit_type == "RENTAL" and not permit_type == "MECHANICAL":
            ##     raise ValueError, "Unexpected permit type: %s in row: %s" % (
            ##         permit_type, row)
            
            ## bldg_type = row[2]
            
            ## #can use this to filter out non-rental or obsolete entries
            ## #don't need to track otherwise:
            ## status = row[3]
            ## parcel_id = row[4]

            ## #should be fixed per source:
            ## ss_city = row[6]

            ## bldg_sf = row[7]
            ## no_bldgs = row[8]
            ## applicant_name = row[9]
            ## no_stories = row[10]
            ## no_units = row[11]

            ## if not ( (ss_city.lower() == city_name.lower()) or (ss_city == '') ):
            ##     raise ValueError, "Unexpected city: %s" % (ss_city)

            ## sqft = row[7]
            ## number_of_buildings = row[8]
            ## applicant_name = row[9]
            ## number_of_stories = row[10]
            ## number_of_units = row[11]
            
            #check if this is one we want to skip
            if conversions.has_key(address.upper()):
                address = conversions[address.upper()]

            ## if (not status in ['EXPIRED', 'CLOSED']) and (permit_type in ['RENTAL']):

            #make sure it's not one we're skipping:
            if not address:
                print "SKIPPING ITEM: %s" % row[1]
                skips += 1
            else:
                #check if we've started processing any results for this row
                if locations.has_key(address.upper()):
                    location = locations[address.upper()]
                else:
                    location = Location()

            #temporarily just want to look at google again
            #location.sources = ["google"]
            #location.sources = ["google", "bing", "usgeo", "geonames", "openmq"]
            location.sources = ["google", "bing"]

            #do some geocoding, as needed:
            search = "%s, %s, %s" % (address.upper(), city_name, city.state)

            any_updated = False
            for geo_source in location.sources:
                update = geo.lookup(search, geo_source, location, force=True)
                #update = geo.lookup(search, geo_source, location, force=False)
                if update:
                    any_updated = True

            location.sources = ["google", "bing", "usgeo", "geonames", "openmq", "mq"]

            #this is the case for brand new searches
            #(which are updated in a different sense)
            if not hasattr(location, "address_alt") or not location.address_alt:
                any_updated = True

            location.address_alt = search
            #location.bldg_units = bldg_units
            #location.units_bdrms = units_bdrms
            locations[address.upper()] = location

            #handle the database storage
            bldg = make_building(location, bldg_id, city, feed_source, no_units=no_units)

            if invoice_note:
                (person, bldg_person) = make_person(invoice_note, bldg, "Permit Applicant")

            if any_updated:
                #back it up for later
                #enable this when downloading GPS coordinates...
                #the rest of the time it slows things down
                local_cache['buildings'] = {}
                for key, value in locations.items():
                    local_cache['buildings'][key] = value.to_dict()
                save_json(cache_destination, local_cache)

            print

    destination = '%s.tsv' % city_tag
    save_results(locations, destination)

Example #7

Show file

File: source_block.py Project: clampist/anime_chaser

def source_block(target):
    return {
        'target': target,
        'source_form':  SourceForm(),
        'source_list': Source.get_by_target(target)
    }

Example #8

Show file

File: convert-evanston.py Project: codeforbtv/green-rental

def read_csv(source_csv, city_name, city_tag):
    city_options = City.objects.filter(tag=city_tag)
    print "Number of cities available: %s" % len(city_options)
    if not len(city_options):
        raise ValueError, "CITY NOT FOUND! run make_cities.py first"
        ## city = City()
        ## city.name = city_name
        ## city.tag = to_tag(city.name)
        ## city.save()
    else:
        city = city_options[0]

    print city

    feed_date = "2013-07-31"

    feeds = FeedInfo.objects.filter(city=city).filter(added=feed_date)
    if feeds.exists():
        feed = feeds[0]
        print "Already had feed: %s, %s" % (feed.city, feed.added)
    else:
        feed = FeedInfo()
        feed.city = city
        feed.added = feed_date
        feed.version = "0.1"
        feed.save()
        print "Created new feed: %s" % feed.city.name

    people = Person.objects.filter(name="Blank")
    if people.exists():
        person = people[0]
        print "Already had person: %s" % (person.name)
    else:
        person = Person()
        person.name = "Blank"
        person.save()
        print "Created new person: %s" % person.name

    sources = Source.objects.filter(feed=feed)
    if sources.exists():
        feed_source = sources[0]
        print "Already had source: %s, %s" % (feed_source.feed.city,
                                              feed_source.feed.added)
    else:
        feed_source = Source()
        feed_source.feed = feed
        feed_source.person = person
        feed_source.save()
        print "Created new source: %s" % feed_source.feed.city.name

    cache_file = "%s.json" % city.tag
    cache_destination = os.path.join(os.path.dirname(source_csv), cache_file)
    #keep a local copy of data we've processed...
    #this should help with subsequent calls
    #to make sure we don't need to duplicate calls to remote geolocation APIs:
    local_cache = load_json(cache_destination, create=True)
    if not local_cache.has_key('buildings'):
        local_cache['buildings'] = {}
    if not local_cache.has_key('parcels'):
        local_cache['parcels'] = {}

    locations = {}
    for key, value in local_cache['buildings'].items():
        locations[key] = Location(value)

    #geocoder helper:
    geo = Geo()

    skips = 0
    #with codecs.open(source_csv, 'rb', encoding='utf-8') as csvfile:
    with open(source_csv) as csvfile:
        #reader = csv.reader(csvfile, delimiter=' ', quotechar='|')
        #reader = csv.reader(csvfile)
        #reader = unicodecsv.UnicodeReader(csvfile, encoding='utf-8')

        reader = unicode_csv_reader(csvfile)

        #just print the first row:
        print '>, <'.join(reader.next())

        count = 0
        for row in reader:
            count += 1
            print "Looking at row: %s" % count

            #could exit out early here, if needed
            if count > 1000:
                #exit()
                pass

            address = row[0]

            #need to fix the number being at the end of the address
            parts = address.split(',')
            anumber = parts[-1]
            parts = parts[:-1]
            street = ",".join(parts)
            address = "%s %s" % (anumber, street)

            invoice_number = row[1]
            bldg_id = row[1]
            print bldg_id

            #this is where owner is stored
            invoice_note = row[6]
            print invoice_note
            if re.match('Sent to:', invoice_note):
                print "changing invoice note from: %s" % invoice_note
                invoice_note = invoice_note[8:]
                print "to: %s" % invoice_note
            else:
                #raise ValueError, "invoice note does not start with Sent to"
                print "!!!!!invoice note does not start with Sent to!!!!!"
                print ""
                print ""

            no_units = row[12]

            ## #should always be "RENTAL" (don't need to track this one)
            ## permit_type = row[1]
            ## if not permit_type == "RENTAL" and not permit_type == "MECHANICAL":
            ##     raise ValueError, "Unexpected permit type: %s in row: %s" % (
            ##         permit_type, row)

            ## bldg_type = row[2]

            ## #can use this to filter out non-rental or obsolete entries
            ## #don't need to track otherwise:
            ## status = row[3]
            ## parcel_id = row[4]

            ## #should be fixed per source:
            ## ss_city = row[6]

            ## bldg_sf = row[7]
            ## no_bldgs = row[8]
            ## applicant_name = row[9]
            ## no_stories = row[10]
            ## no_units = row[11]

            ## if not ( (ss_city.lower() == city_name.lower()) or (ss_city == '') ):
            ##     raise ValueError, "Unexpected city: %s" % (ss_city)

            ## sqft = row[7]
            ## number_of_buildings = row[8]
            ## applicant_name = row[9]
            ## number_of_stories = row[10]
            ## number_of_units = row[11]

            #check if this is one we want to skip
            if conversions.has_key(address.upper()):
                address = conversions[address.upper()]

            ## if (not status in ['EXPIRED', 'CLOSED']) and (permit_type in ['RENTAL']):

            #make sure it's not one we're skipping:
            if not address:
                print "SKIPPING ITEM: %s" % row[1]
                skips += 1
            else:
                #check if we've started processing any results for this row
                if locations.has_key(address.upper()):
                    location = locations[address.upper()]
                else:
                    location = Location()

            #temporarily just want to look at google again
            #location.sources = ["google"]
            #location.sources = ["google", "bing", "usgeo", "geonames", "openmq"]
            location.sources = ["google", "bing"]

            #do some geocoding, as needed:
            search = "%s, %s, %s" % (address.upper(), city_name, city.state)

            any_updated = False
            for geo_source in location.sources:
                update = geo.lookup(search, geo_source, location, force=True)
                #update = geo.lookup(search, geo_source, location, force=False)
                if update:
                    any_updated = True

            location.sources = [
                "google", "bing", "usgeo", "geonames", "openmq", "mq"
            ]

            #this is the case for brand new searches
            #(which are updated in a different sense)
            if not hasattr(location,
                           "address_alt") or not location.address_alt:
                any_updated = True

            location.address_alt = search
            #location.bldg_units = bldg_units
            #location.units_bdrms = units_bdrms
            locations[address.upper()] = location

            #handle the database storage
            bldg = make_building(location,
                                 bldg_id,
                                 city,
                                 feed_source,
                                 no_units=no_units)

            if invoice_note:
                (person, bldg_person) = make_person(invoice_note, bldg,
                                                    "Permit Applicant")

            if any_updated:
                #back it up for later
                #enable this when downloading GPS coordinates...
                #the rest of the time it slows things down
                local_cache['buildings'] = {}
                for key, value in locations.items():
                    local_cache['buildings'][key] = value.to_dict()
                save_json(cache_destination, local_cache)

            print

    destination = '%s.tsv' % city_tag
    save_results(locations, destination)

Example #9

Show file

File: convert-columbia.py Project: codeforbtv/green-rental

def read_csv(source_csv, city_name, city_tag):
    city_options = City.objects.filter(tag=city_tag)
    print "Number of cities available: %s" % len(city_options)
    if not len(city_options):
        raise ValueError, "CITY NOT FOUND! run make_cities.py first"
        ## city = City()
        ## city.name = city_name
        ## city.tag = to_tag(city.name)
        ## city.save()
    else:
        city = city_options[0]

    print city

    feed_date = "2013-10-16"

    feeds = FeedInfo.objects.filter(city=city).filter(added=feed_date)
    if feeds.exists():
        feed = feeds[0]
        print "Already had feed: %s, %s" % (feed.city, feed.added)
    else:
        feed = FeedInfo()
        feed.city = city
        feed.added = feed_date
        feed.version = "0.1"
        feed.save()
        print "Created new feed: %s" % feed.city.name

    people = Person.objects.filter(name="Blank")
    if people.exists():
        person = people[0]
        print "Already had person: %s" % (person.name)
    else:
        person = Person()
        person.name = "Blank"
        person.save()
        print "Created new person: %s" % person.name

    sources = Source.objects.filter(feed=feed)
    if sources.exists():
        feed_source = sources[0]
        print "Already had source: %s, %s" % (feed_source.feed.city, feed_source.feed.added)
    else:
        feed_source = Source()
        feed_source.feed = feed
        feed_source.person = person
        feed_source.save()
        print "Created new source: %s" % feed_source.feed.city.name


    cache_file = "%s.json" % city.tag
    cache_destination = os.path.join(os.path.dirname(source_csv), cache_file)
    #keep a local copy of data we've processed...
    #this should help with subsequent calls
    #to make sure we don't need to duplicate calls to remote geolocation APIs:
    local_cache = load_json(cache_destination, create=True)
    if not local_cache.has_key('buildings'):
        local_cache['buildings'] = {}
    if not local_cache.has_key('parcels'):
        local_cache['parcels'] = {}
    
    locations = {}
    for key, value in local_cache['buildings'].items():
        locations[key] = Location(value)

    #geocoder helper:
    geo = Geo()

    skips = 0
    #with codecs.open(source_csv, 'rb', encoding='utf-8') as csvfile:
    with open(source_csv) as csvfile:
        #reader = csv.reader(csvfile, delimiter=' ', quotechar='|')
        #reader = csv.reader(csvfile)
        #reader = unicodecsv.UnicodeReader(csvfile, encoding='utf-8')

        reader = unicode_csv_reader(csvfile)

        #just print the first row:
        print '>, <'.join(reader.next())

        count = 0

        #want to randomize the order... distribute options more evenly
        #print len(reader)
        #exit()
        #in order to randomize, should randomize the order in the csv
        for row in reader:
            count += 1
            print "Looking at row: %s" % count
            
            #could exit out early here, if needed
            if count > 10:
                #exit()
                pass

            print row
            address = row[0]


            ## no_units = row[12]


            #can pass this in as bldg_id to make_building
            #that gets used for parcel too
            parcel_id = row[1]
            bldg_id = parcel_id

            street_num = row[2]
            street_dir = row[3]
            street_name = row[4]
            street_sfx = row[5]
            #eg building number
            qualifier_pre = row[6]
            #eg "UNIT" or "APT"
            qualifier_post = row[7]
            apt_num = row[8]
            #skip row9 (in/out... whatever that means)
            zip_code = row[10]
            #skip row11, assessor id
            #skip row12, address num
            #skip row13, x
            #skip row14, y
            #xcoord == lng
            lng = row[15]
            lat = row[16]

            #entry floor number: (named 'z' in sheet)
            floor = row[17]

            #skip row18, strcid... not sure
            #skip row19, parent
            #skip row20, app_
            #skip row21, hteloc
            zone = row[22]
            bldg_type = row[23]
            #number of buildings
            bldg_num = row[24]
            no_units = row[25]

            #skip row[26], inspection type
            #skip row27, app number
            #skip row28, date received
            #skip row29, application type
            #skip row30, ownerid
            #skip row31, operator id
            #skip row32, agent_id
            #skip row33, mail to
            central_heat = row[34]
            if central_heat == 'Y':
                central_heat = True
            else:
                central_heat = False

            #heat mechanism? heat mechanic??? not sure
            heat_mech = row[35]
            #skip row36, agent id (2)
            #skip row37, agent last name
            #skip row38 agent first name
            #skip row39 agent middle initial
            #skip row40, agent title
            #skip row41, business name

            #could be owner, could be agent
            owner_name = row[42]
            owner_address1 = row[43]
            owner_address2 = row[44]
            owner_city = row[45]
            owner_state = row[46]
            owner_zip = row[47]

            
            #address = " ".join([street_num, street_dir, street_name, street_sfx, qualifier_pre, qualifier_post, apt_num])

            address_main = " ".join([street_num, street_dir, street_name, street_sfx, qualifier_pre])
            address_main = address_main.strip()
            #get rid of any double spaces
            address_main = address_main.replace("  ", " ")
            
            apt_main = " ".join([qualifier_post, apt_num])
            apt_main = apt_main.strip()

            address = address_main
            print address

            owner_address = ", ".join([owner_address1, owner_address2, owner_city, owner_state, owner_zip])
            
            ## #should always be "RENTAL" (don't need to track this one)
            ## permit_type = row[1]
            ## if not permit_type == "RENTAL" and not permit_type == "MECHANICAL":
            ##     raise ValueError, "Unexpected permit type: %s in row: %s" % (
            ##         permit_type, row)
            
            ## bldg_type = row[2]
            
            ## #can use this to filter out non-rental or obsolete entries
            ## #don't need to track otherwise:
            ## status = row[3]
            ## parcel_id = row[4]

            ## #should be fixed per source:
            ## ss_city = row[6]

            ## bldg_sf = row[7]
            ## no_bldgs = row[8]
            ## applicant_name = row[9]
            ## no_stories = row[10]
            ## no_units = row[11]

            ## sqft = row[7]
            ## number_of_buildings = row[8]
            ## applicant_name = row[9]
            ## number_of_stories = row[10]
            ## number_of_units = row[11]
            
            #check if this is one we want to skip
            if conversions.has_key(address.upper()):
                address = conversions[address.upper()]

            ## if (not status in ['EXPIRED', 'CLOSED']) and (permit_type in ['RENTAL']):

            #make sure it's not one we're skipping:
            if not address:
                print "SKIPPING ITEM: %s" % row[1]
                skips += 1
            else:
                #check if we've started processing any results for this row
                if locations.has_key(address.upper()):
                    location = locations[address.upper()]
                else:
                    location = Location()

            #temporarily just want to look at google again
            #location.sources = ["google"]
            #location.sources = ["google", "bing"]
            #location.sources = ["google", "bing", "usgeo", "geonames", "openmq"]
            #skip geocoding for columbia
            location.sources = []
            
            #do some geocoding, as needed:
            search = "%s, %s, %s" % (address.upper(), city_name, city.state)

            any_updated = False
            for geo_source in location.sources:
                update = geo.lookup(search, geo_source, location, force=True)
                #update = geo.lookup(search, geo_source, location, force=False)
                if update:
                    any_updated = True

            location.sources = ['csv', "google", "bing", "usgeo", "geonames", "openmq", "mq"]

            #manually add data from csv here:
            result = []
            result.append({'place': address, 'lat': lat, 'lng': lng})
            setattr(location, 'csv', result)

            #this is the case for brand new searches
            #(which are updated in a different sense)
            if not hasattr(location, "address_alt") or not location.address_alt:
                any_updated = True

            location.address_alt = search
            #location.bldg_units = bldg_units
            #location.units_bdrms = units_bdrms
            locations[address.upper()] = location

            #handle the database storage
            bldg = make_building(location, bldg_id, city, feed_source, no_units=no_units, bldg_type=bldg_type)

            if apt_main:
                unit = make_unit(apt_main, bldg)

            (person, bldg_person) = make_person(owner_name, bldg, "Agent", address=owner_address)


            if any_updated:
                #back it up for later
                #enable this when downloading GPS coordinates...
                #the rest of the time it slows things down
                local_cache['buildings'] = {}
                for key, value in locations.items():
                    local_cache['buildings'][key] = value.to_dict()
                save_json(cache_destination, local_cache)

            print

            #exit()
            
    destination = '%s.tsv' % city_tag
    save_results(locations, destination)

Example #10

Show file

File: convert-columbia.py Project: codeforbtv/green-rental

def read_csv(source_csv, city_name, city_tag):
    city_options = City.objects.filter(tag=city_tag)
    print "Number of cities available: %s" % len(city_options)
    if not len(city_options):
        raise ValueError, "CITY NOT FOUND! run make_cities.py first"
        ## city = City()
        ## city.name = city_name
        ## city.tag = to_tag(city.name)
        ## city.save()
    else:
        city = city_options[0]

    print city

    feed_date = "2013-10-16"

    feeds = FeedInfo.objects.filter(city=city).filter(added=feed_date)
    if feeds.exists():
        feed = feeds[0]
        print "Already had feed: %s, %s" % (feed.city, feed.added)
    else:
        feed = FeedInfo()
        feed.city = city
        feed.added = feed_date
        feed.version = "0.1"
        feed.save()
        print "Created new feed: %s" % feed.city.name

    people = Person.objects.filter(name="Blank")
    if people.exists():
        person = people[0]
        print "Already had person: %s" % (person.name)
    else:
        person = Person()
        person.name = "Blank"
        person.save()
        print "Created new person: %s" % person.name

    sources = Source.objects.filter(feed=feed)
    if sources.exists():
        feed_source = sources[0]
        print "Already had source: %s, %s" % (feed_source.feed.city,
                                              feed_source.feed.added)
    else:
        feed_source = Source()
        feed_source.feed = feed
        feed_source.person = person
        feed_source.save()
        print "Created new source: %s" % feed_source.feed.city.name

    cache_file = "%s.json" % city.tag
    cache_destination = os.path.join(os.path.dirname(source_csv), cache_file)
    #keep a local copy of data we've processed...
    #this should help with subsequent calls
    #to make sure we don't need to duplicate calls to remote geolocation APIs:
    local_cache = load_json(cache_destination, create=True)
    if not local_cache.has_key('buildings'):
        local_cache['buildings'] = {}
    if not local_cache.has_key('parcels'):
        local_cache['parcels'] = {}

    locations = {}
    for key, value in local_cache['buildings'].items():
        locations[key] = Location(value)

    #geocoder helper:
    geo = Geo()

    skips = 0
    #with codecs.open(source_csv, 'rb', encoding='utf-8') as csvfile:
    with open(source_csv) as csvfile:
        #reader = csv.reader(csvfile, delimiter=' ', quotechar='|')
        #reader = csv.reader(csvfile)
        #reader = unicodecsv.UnicodeReader(csvfile, encoding='utf-8')

        reader = unicode_csv_reader(csvfile)

        #just print the first row:
        print '>, <'.join(reader.next())

        count = 0

        #want to randomize the order... distribute options more evenly
        #print len(reader)
        #exit()
        #in order to randomize, should randomize the order in the csv
        for row in reader:
            count += 1
            print "Looking at row: %s" % count

            #could exit out early here, if needed
            if count > 10:
                #exit()
                pass

            print row
            address = row[0]

            ## no_units = row[12]

            #can pass this in as bldg_id to make_building
            #that gets used for parcel too
            parcel_id = row[1]
            bldg_id = parcel_id

            street_num = row[2]
            street_dir = row[3]
            street_name = row[4]
            street_sfx = row[5]
            #eg building number
            qualifier_pre = row[6]
            #eg "UNIT" or "APT"
            qualifier_post = row[7]
            apt_num = row[8]
            #skip row9 (in/out... whatever that means)
            zip_code = row[10]
            #skip row11, assessor id
            #skip row12, address num
            #skip row13, x
            #skip row14, y
            #xcoord == lng
            lng = row[15]
            lat = row[16]

            #entry floor number: (named 'z' in sheet)
            floor = row[17]

            #skip row18, strcid... not sure
            #skip row19, parent
            #skip row20, app_
            #skip row21, hteloc
            zone = row[22]
            bldg_type = row[23]
            #number of buildings
            bldg_num = row[24]
            no_units = row[25]

            #skip row[26], inspection type
            #skip row27, app number
            #skip row28, date received
            #skip row29, application type
            #skip row30, ownerid
            #skip row31, operator id
            #skip row32, agent_id
            #skip row33, mail to
            central_heat = row[34]
            if central_heat == 'Y':
                central_heat = True
            else:
                central_heat = False

            #heat mechanism? heat mechanic??? not sure
            heat_mech = row[35]
            #skip row36, agent id (2)
            #skip row37, agent last name
            #skip row38 agent first name
            #skip row39 agent middle initial
            #skip row40, agent title
            #skip row41, business name

            #could be owner, could be agent
            owner_name = row[42]
            owner_address1 = row[43]
            owner_address2 = row[44]
            owner_city = row[45]
            owner_state = row[46]
            owner_zip = row[47]

            #address = " ".join([street_num, street_dir, street_name, street_sfx, qualifier_pre, qualifier_post, apt_num])

            address_main = " ".join([
                street_num, street_dir, street_name, street_sfx, qualifier_pre
            ])
            address_main = address_main.strip()
            #get rid of any double spaces
            address_main = address_main.replace("  ", " ")

            apt_main = " ".join([qualifier_post, apt_num])
            apt_main = apt_main.strip()

            address = address_main
            print address

            owner_address = ", ".join([
                owner_address1, owner_address2, owner_city, owner_state,
                owner_zip
            ])

            ## #should always be "RENTAL" (don't need to track this one)
            ## permit_type = row[1]
            ## if not permit_type == "RENTAL" and not permit_type == "MECHANICAL":
            ##     raise ValueError, "Unexpected permit type: %s in row: %s" % (
            ##         permit_type, row)

            ## bldg_type = row[2]

            ## #can use this to filter out non-rental or obsolete entries
            ## #don't need to track otherwise:
            ## status = row[3]
            ## parcel_id = row[4]

            ## #should be fixed per source:
            ## ss_city = row[6]

            ## bldg_sf = row[7]
            ## no_bldgs = row[8]
            ## applicant_name = row[9]
            ## no_stories = row[10]
            ## no_units = row[11]

            ## sqft = row[7]
            ## number_of_buildings = row[8]
            ## applicant_name = row[9]
            ## number_of_stories = row[10]
            ## number_of_units = row[11]

            #check if this is one we want to skip
            if conversions.has_key(address.upper()):
                address = conversions[address.upper()]

            ## if (not status in ['EXPIRED', 'CLOSED']) and (permit_type in ['RENTAL']):

            #make sure it's not one we're skipping:
            if not address:
                print "SKIPPING ITEM: %s" % row[1]
                skips += 1
            else:
                #check if we've started processing any results for this row
                if locations.has_key(address.upper()):
                    location = locations[address.upper()]
                else:
                    location = Location()

            #temporarily just want to look at google again
            #location.sources = ["google"]
            #location.sources = ["google", "bing"]
            #location.sources = ["google", "bing", "usgeo", "geonames", "openmq"]
            #skip geocoding for columbia
            location.sources = []

            #do some geocoding, as needed:
            search = "%s, %s, %s" % (address.upper(), city_name, city.state)

            any_updated = False
            for geo_source in location.sources:
                update = geo.lookup(search, geo_source, location, force=True)
                #update = geo.lookup(search, geo_source, location, force=False)
                if update:
                    any_updated = True

            location.sources = [
                'csv', "google", "bing", "usgeo", "geonames", "openmq", "mq"
            ]

            #manually add data from csv here:
            result = []
            result.append({'place': address, 'lat': lat, 'lng': lng})
            setattr(location, 'csv', result)

            #this is the case for brand new searches
            #(which are updated in a different sense)
            if not hasattr(location,
                           "address_alt") or not location.address_alt:
                any_updated = True

            location.address_alt = search
            #location.bldg_units = bldg_units
            #location.units_bdrms = units_bdrms
            locations[address.upper()] = location

            #handle the database storage
            bldg = make_building(location,
                                 bldg_id,
                                 city,
                                 feed_source,
                                 no_units=no_units,
                                 bldg_type=bldg_type)

            if apt_main:
                unit = make_unit(apt_main, bldg)

            (person, bldg_person) = make_person(owner_name,
                                                bldg,
                                                "Agent",
                                                address=owner_address)

            if any_updated:
                #back it up for later
                #enable this when downloading GPS coordinates...
                #the rest of the time it slows things down
                local_cache['buildings'] = {}
                for key, value in locations.items():
                    local_cache['buildings'][key] = value.to_dict()
                save_json(cache_destination, local_cache)

            print

            #exit()

    destination = '%s.tsv' % city_tag
    save_results(locations, destination)

Example #11

Show file

File: convert-bloomington.py Project: enerscore/rentrocket

def read_csv(source_csv):
    city_options = City.objects.filter(tag="bloomington_in")
    print "Number of cities available: %s" % len(city_options)
    if not len(city_options):
        raise ValueError, "CITY NOT FOUND! run make_cities.py first"
        ## city = City()
        ## city.name = "Bloomington"
        ## city.tag = to_tag(city.name)
        ## city.save()
    else:
        city = city_options[0]

    print city

    feed_date = "2013-08-29"

    feeds = FeedInfo.objects.filter(city=city).filter(added=feed_date)
    if feeds.exists():
        feed = feeds[0]
        print "Already had feed: %s, %s" % (feed.city, feed.added)
    else:
        feed = FeedInfo()
        feed.city = city
        feed.added = feed_date
        feed.version = "0.1"
        feed.save()
        print "Created new feed: %s" % feed.city

    people = Person.objects.filter(name="Blank")
    if people.exists():
        person = people[0]
        print "Already had person: %s" % (person.name)
    else:
        person = Person()
        person.name = "Blank"
        person.save()
        print "Created new person: %s" % person.name

    sources = Source.objects.filter(feed=feed)
    if sources.exists():
        feed_source = sources[0]
        print "Already had source: %s, %s" % (feed_source.feed.city, feed_source.feed.added)
    else:
        feed_source = Source()
        feed_source.feed = feed
        feed_source.person = person
        feed_source.save()
        print "Created new source: %s" % feed_source.feed.city

    cache_file = "%s.json" % city.tag
    cache_destination = os.path.join(os.path.dirname(source_csv), cache_file)
    # keep a local copy of data we've processed...
    # this should help with subsequent calls
    # to make sure we don't need to duplicate calls to remote geolocation APIs:
    local_cache = load_json(cache_destination, create=True)
    if not local_cache.has_key("buildings"):
        local_cache["buildings"] = {}
    if not local_cache.has_key("parcels"):
        local_cache["parcels"] = {}

    locations = {}
    for key, value in local_cache["buildings"].items():
        locations[key] = Location(value)

    # geocoder helper:
    geo = Geo()

    skips = 0
    with codecs.open(source_csv, "rb", encoding="utf-8") as csvfile:
        # reader = csv.reader(csvfile, delimiter=' ', quotechar='|')
        reader = csv.reader(csvfile)

        # just print the first row:
        print ">, <".join(reader.next())

        count = 0
        for row in reader:
            count += 1
            print "Looking at row: %s" % count

            # could exit out early here, if needed
            if count > 1000:
                # exit()
                pass

            bldg_id = row[0]
            print bldg_id

            address = row[1]
            print address

            owner = row[2]

            # skip this:
            ownder_contact = row[3]

            agent = row[4]

            bldg_units = row[9]
            print bldg_units

            units_bdrms = row[10]
            print units_bdrms

            # check if this is one we want to skip
            if conversions.has_key(address.upper()):
                address = conversions[address.upper()]

            # make sure it's not one we're skipping:
            if not address:
                print "SKIPPING ITEM: %s" % row[1]
                skips += 1
            else:
                if locations.has_key(address.upper()):
                    location = locations[address.upper()]
                else:
                    location = Location()

                # temporarily just want to look at google again
                location.sources = ["google"]

                # do some geocoding, as needed:
                search = "%s, Bloomington IN" % address.upper()

                any_updated = False
                for geo_source in location.sources:
                    update = geo.lookup(search, geo_source, location, force=True)
                    if update:
                        any_updated = True

                location.sources = ["google", "bing", "usgeo", "geonames", "openmq", "mq"]

                if not hasattr(location, "address_alt") or not location.address_alt:
                    any_updated = True

                location.address_alt = search
                location.bldg_units = bldg_units
                location.units_bdrms = units_bdrms
                locations[address.upper()] = location

                # handle the database storage
                bldg = make_building(location, bldg_id, city, feed_source)

                # owner_details = parse_person(owner)
                if owner:
                    result = special_cases(owner)
                    if result:
                        (owner_name, owner_address) = result
                    else:
                        (owner_name, owner_address, owner_phone, remainder) = parse_person(owner)
                        ## print "owner name: %s" % owner_name
                        ## print "owner address: %s" % owner_address
                        ## print ""

                        if owner_name:
                            (person, bldg_person) = make_person(owner_name, bldg, "Owner", address=owner_address)

                if agent and agent != "No Agent":
                    # agent_details = parse_person(agent)
                    (agent_name, agent_address, agent_phone, remainder) = parse_person(agent)
                    ## print "agent name: %s" % agent_name
                    ## print "agent address: %s" % agent_address
                    ## print ""

                    if agent_name:
                        (person, bldg_person) = make_person(agent_name, bldg, "Agent", address=agent_address, city=city)

                if any_updated:
                    # back it up for later
                    # enable this when downloading GPS coordinates...
                    # the rest of the time it slows things down
                    local_cache["buildings"] = {}
                    for key, value in locations.items():
                        local_cache["buildings"][key] = value.to_dict()
                    save_json(cache_destination, local_cache)

                print

    save_results(locations, "bloomington-filtered.tsv")

Example #12

Show file

File: convert-columbia.py Project: enerscore/rentrocket

def read_csv(source_csv, city_name, city_tag):
    city_options = City.objects.filter(tag=city_tag)
    print "Number of cities available: %s" % len(city_options)
    if not len(city_options):
        raise ValueError, "CITY NOT FOUND! run make_cities.py first"
        ## city = City()
        ## city.name = city_name
        ## city.tag = to_tag(city.name)
        ## city.save()
    else:
        city = city_options[0]

    print city

    feed_date = "2013-10-16"

    feeds = FeedInfo.objects.filter(city=city).filter(added=feed_date)
    if feeds.exists():
        feed = feeds[0]
        print "Already had feed: %s, %s" % (feed.city, feed.added)
    else:
        feed = FeedInfo()
        feed.city = city
        feed.added = feed_date
        feed.version = "0.1"
        feed.save()
        print "Created new feed: %s" % feed.city.name

    people = Person.objects.filter(name="Blank")
    if people.exists():
        person = people[0]
        print "Already had person: %s" % (person.name)
    else:
        person = Person()
        person.name = "Blank"
        person.save()
        print "Created new person: %s" % person.name

    sources = Source.objects.filter(feed=feed)
    if sources.exists():
        feed_source = sources[0]
        print "Already had source: %s, %s" % (feed_source.feed.city, feed_source.feed.added)
    else:
        feed_source = Source()
        feed_source.feed = feed
        feed_source.person = person
        feed_source.save()
        print "Created new source: %s" % feed_source.feed.city.name

    cache_file = "%s-20150525.json" % city.tag
    cache_destination = os.path.join(os.path.dirname(source_csv), cache_file)
    # keep a local copy of data we've processed...
    # this should help with subsequent calls
    # to make sure we don't need to duplicate calls to remote geolocation APIs:
    local_cache = load_json(cache_destination, create=True)
    if not local_cache.has_key("buildings"):
        local_cache["buildings"] = {}

    search_results = {}
    for key, value in local_cache["buildings"].items():
        # search_results[key] = Location(value)
        sr = SearchResults()
        sr.from_dict(value)
        # print
        # print sr
        # print
        search_results[key] = sr

    # geocoder helper:
    # geo = Geo()

    skips = 0
    with open(source_csv) as csvfile:

        reader = unicode_csv_reader(csvfile)

        # just print the first row:
        print ">, <".join(reader.next())

        count = 0

        # want to randomize the order... distribute options more evenly
        # print len(reader)
        # exit()
        # in order to randomize, should randomize the order in the csv
        for row in reader:
            count += 1
            print "Looking at row: %s" % count

            any_updated = False

            # could exit out early here, if needed
            if count > 10:
                # exit()
                pass

            # if you want to skip ahead more quickly:
            if count < 27187:
                pass
            else:

                # print row
                objectid = row[0]

                ## no_units = row[12]

                # can pass this in as bldg_id to make_building
                # that gets used for parcel too
                parcel_id = row[1]
                bldg_id = parcel_id

                street_num = row[2]
                street_dir = row[3]
                street_name = row[4]
                street_sfx = row[5]
                # eg building number
                qualifier_pre = row[6]
                # eg "UNIT" or "APT"
                qualifier_post = row[7]
                apt_num = row[8]
                # skip row9 (in/out... whatever that means)
                zip_code = row[10]
                # skip row11, assessor id
                # skip row12, address num
                # skip row13, x
                # skip row14, y
                # xcoord == lng
                lng = row[15]
                lat = row[16]

                # entry floor number: (named 'z' in sheet)
                floor = row[17]

                # skip row18, strcid... not sure
                # skip row19, parent
                # skip row20, app_
                # skip row21, hteloc
                zone = row[22]
                bldg_type = row[23]
                # number of buildings
                bldg_num = row[24]
                no_units = row[25]

                # skip row[26], inspection type
                # skip row27, app number
                # skip row28, date received
                # skip row29, application type
                # skip row30, ownerid
                # skip row31, operator id
                # skip row32, agent_id
                # skip row33, mail to
                central_heat = row[34]
                if central_heat == "Y":
                    central_heat = True
                else:
                    central_heat = False

                # heat mechanism? heat mechanic??? not sure
                heat_mech = row[35]
                # skip row36, agent id (2)
                # skip row37, agent last name
                # skip row38 agent first name
                # skip row39 agent middle initial
                # skip row40, agent title
                # skip row41, business name

                # could be owner, could be agent
                owner_name = row[42]
                owner_address1 = row[43]
                owner_address2 = row[44]
                owner_city = row[45]
                owner_state = row[46]
                owner_zip = row[47]

                # address = " ".join([street_num, street_dir, street_name, street_sfx, qualifier_pre, qualifier_post, apt_num])

                # this is causing problems with lookups in google
                if (
                    qualifier_pre == "DUP"
                    or qualifier_pre == "DUPE"
                    or qualifier_pre == "2-Jan"
                    or qualifier_pre == "HM"
                    or qualifier_pre == "DWN"
                ):
                    qualifier_pre = ""

                address_main = " ".join([street_num, street_dir, street_name, street_sfx, qualifier_pre])
                address_main = address_main.strip()
                # get rid of any double spaces
                address_main = address_main.replace("  ", " ")

                # similar to conversions,
                # but there are too many of these to list there
                if re.search("HOLLY RIDGE LN", address_main):
                    address_main = address_main.replace("HOLLY RIDGE LN", "HOLLYRIDGE LN")
                if re.search("BERKSHIRE CT", address_main):
                    address_main = address_main.replace("BERKSHIRE CT", "BERKSHIRE")
                    # address_main = ''
                if re.search("CAMERON CT", address_main):
                    address_main = address_main.replace("CAMERON CT", "CAMERON")
                    # address_main = ''
                if re.search("ATHENS CT", address_main):
                    address_main = address_main.replace("ATHENS CT", "ATHENS")
                    # address_main = ''
                if re.search("LAMAR CT", address_main):
                    address_main = address_main.replace("LAMAR CT", "LAMAR")
                    # address_main = ''
                if re.search("MONITEAU CT", address_main):
                    address_main = address_main.replace("MONITEAU CT", "MONITEAU")
                    # address_main = ''
                if re.search("IMPERIAL CT", address_main):
                    address_main = ""
                if re.search("PERKINS DR", address_main):
                    address_main = ""
                if re.search("GRANITE OAKS CT", address_main):
                    address_main = ""

                # sometimes the 'BLDG' data is added in the wrong place
                # then it gets treated as a unit item
                # (but it's not *always* a unit item, so can't generalize it that way)
                if qualifier_post == "BLDG" or qualifier_post == "LOT":
                    address_main = " ".join([address_main, qualifier_post, apt_main])
                    address_main = address_main.strip()
                    apt_main = ""
                else:
                    apt_main = " ".join([qualifier_post, apt_num])
                    apt_main = apt_main.strip()

                # check if this is one we want to skip
                if conversions.has_key(address_main.upper()):
                    address_main = conversions[address_main.upper()]

                if address_main:
                    print "APT_MAIN: ", apt_main
                    address = ", ".join([address_main, apt_main])
                else:
                    address = ""

                owner_address = ", ".join([owner_address1, owner_address2, owner_city, owner_state, owner_zip])

                ## if (not status in ['EXPIRED', 'CLOSED']) and (permit_type in ['RENTAL']):

                print "Parcel ID:", parcel_id
                print address

                results = None

                # make sure it's not one we're skipping:
                if not address:
                    print "SKIPPING ITEM: %s" % row[1]
                    skips += 1

                    skipf = codecs.open("skips.txt", "a", encoding="utf-8")
                    original = " ".join([street_num, street_dir, street_name, street_sfx, qualifier_pre])
                    skipf.write(original)
                    skipf.write("\n")
                    skipf.close()

                else:
                    # check if we've started processing any results for this row
                    if search_results.has_key(address.upper()):
                        print "Already had building: %s" % address
                        results = search_results[address.upper()]
                        # print results
                    else:

                        addy = ", ".join([address_main, city.name, city.state])
                        addy += " " + zip_code
                        # addy += ", USA"
                        print addy

                        # toggle betweeen an actual google query
                        results = address_search(addy, apt_main)

                        # print dir(results)

                        if len(results.matches) > 1:
                            print results
                            for option in results.matches:
                                print "%s: %s, %s" % (option["place"], option["lat"], option["lng"])
                            print
                            print "Source Lat: %s, Lng: %s" % (lat, lng)
                            src_lat = int(float(lat) * 100)
                            src_lng = int(float(lng) * 100)

                            matched = False
                            for current in results.matches:
                                # current = results.matches[0]
                                print current["lat"]
                                print current["lng"]
                                # only want to look at the first 2 decimal places:
                                comp_lat = int(float(current["lat"]) * 100)
                                comp_lng = int(float(current["lng"]) * 100)
                                print comp_lat
                                print comp_lng

                                if (src_lat == comp_lat) and (src_lng == comp_lng):
                                    # results.matches = results.matches[:1]
                                    results.matches = [current]
                                    matched = True

                            if not matched:
                                print "DIDN'T MATCH!"
                                exit()

                        any_updated = True

                        # or just using results as specified in csv
                        # (THIS DOES NOT NORMALIZE THE ADDRESS VIA GOOGLE)
                        # results = SearchResults()
                        # results.unit_text = apt_main
                        # handle_place(results, addy, lat, lng, apt_main)

                    assert results
                    # print results

                    lookup_building_with_geo(results, make=True, parcel_id=parcel_id)
                    # print results
                    # current['results'] = results

                    # print results

                    if results.errors:
                        print results
                        raise ValueError, results.errors
                    else:

                        search_results[address.upper()] = results

                        bldg = results.building
                        assert bldg
                        unit = results.unit

                        # may be a case where the unit is blank
                        # and another unit with an number/letter was created earlier
                        # in that case, we won't be creating one here
                        # and the building will already exist...
                        # not necessarily an error though
                        # just redundant data
                        # assert unit

                        (person, bldg_person) = make_person(owner_name, bldg, "Agent", address=owner_address)

                    # time.sleep(1)

            if any_updated:
                # back it up for later
                # enable this when downloading GPS coordinates...
                # the rest of the time it slows things down
                local_cache["buildings"] = {}
                for key, value in search_results.items():
                    # search_results[key] = SearchResults().from_dict(value)
                    local_cache["buildings"][key] = value.to_dict()
                save_json(cache_destination, local_cache)

            print

Example #13

Show file

 def _get_field_map(cls):
     """
     Return metadata for each field defined on this model, to aid in serializing
     and exporing data.
     """
     return OrderedDict([
         ('source_id', {
             'sql': 'source.uuid',
             'label': 'source:id:admin',
             'serializer': cls.serializers['identity'],
         }),
         ('source_title', {
             'sql': 'source.title',
             'label': Source.get_spreadsheet_field_name('title'),
             'serializer': cls.serializers['identity'],
         }),
         ('source_type', {
             'sql': 'source.type',
             'label': Source.get_spreadsheet_field_name('type'),
             'serializer': cls.serializers['identity'],
         }),
         ('source_author', {
             'sql': 'source.author',
             'label': Source.get_spreadsheet_field_name('author'),
             'serializer': cls.serializers['identity'],
         }),
         ('source_publication', {
             'sql': 'source.publication',
             'label': Source.get_spreadsheet_field_name('publication'),
             'serializer': cls.serializers['identity'],
         }),
         ('source_publication_country', {
             'sql': 'source.publication_country',
             'label':
             Source.get_spreadsheet_field_name('publication_country'),
             'serializer': cls.serializers['identity'],
         }),
         ('source_published_date', {
             'sql':
             "COALESCE(source.published_date, to_char(source.published_timestamp, 'YYYY-MM-DD HH12:MI:SS'))",
             'label': Source.get_spreadsheet_field_name('published_date'),
             'serializer': cls.serializers['identity'],
         }),
         ('source_created_date', {
             'sql':
             "COALESCE(source.created_date, to_char(source.created_timestamp, 'YYYY-MM-DD HH12:MI:SS'))",
             'label': Source.get_spreadsheet_field_name('created_date'),
             'serializer': cls.serializers['identity'],
         }),
         ('source_uploaded_date', {
             'sql':
             "COALESCE(source.uploaded_date, to_char(source.uploaded_timestamp, 'YYYY-MM-DD HH12:MI:SS'))",
             'label': Source.get_spreadsheet_field_name('uploaded_date'),
             'serializer': cls.serializers['identity'],
         }),
         ('source_source_url', {
             'sql': 'source.source_url',
             'label': Source.get_spreadsheet_field_name('source_url'),
             'serializer': cls.serializers['identity'],
         }),
         ('access_point_id', {
             'sql': 'access_point.uuid',
             'label': 'source:access_point_id',
             'serializer': cls.serializers['identity'],
         }),
         ('access_point_type', {
             'sql': 'access_point.type',
             'label': AccessPoint.get_spreadsheet_field_name('type'),
             'serializer': cls.serializers['identity'],
         }),
         ('access_point_trigger', {
             'sql': 'access_point.trigger',
             'label': AccessPoint.get_spreadsheet_field_name('trigger'),
             'serializer': cls.serializers['identity'],
         }),
         ('access_point_accessed_on', {
             'sql': 'access_point.accessed_on',
             'label': AccessPoint.get_spreadsheet_field_name('accessed_on'),
             'serializer': cls.serializers['identity'],
         }),
         ('access_point_archive_url', {
             'sql': 'access_point.archive_url',
             'label': AccessPoint.get_spreadsheet_field_name('archive_url'),
             'serializer': cls.serializers['identity'],
         }),
     ])