def text_to_location(address_text, seed_location=None, allow_numberless=True): ''' Attempts to resolve a raw text string into a Location using the Google Geocoding API. If Location object is provided as seed_location Location, the completed fields from it are used as hints to help resolving. These hints are more forgiving than those used in the Resolve API. Note that the Location's address field will have no bearing on the results. If allow_numberless is False, the resolved location must be one more specific that a neighborhood. Returns None if resolution was not possible. ''' # if given a seed location, this is implemented by copying over the # seed location and inserting the given raw text into the Location's # field if seed_location: location = copy.deepcopy(seed_location) else: location = Location() location.address = address_text return resolve_location(location, allow_numberless)
def text_to_location(address_text,seed_location=None,allow_numberless=True): ''' Attempts to resolve a raw text string into a Location using the Google Geocoding API. If Location object is provided as seed_location Location, the completed fields from it are used as hints to help resolving. These hints are more forgiving than those used in the Resolve API. Note that the Location's address field will have no bearing on the results. If allow_numberless is False, the resolved location must be one more specific that a neighborhood. Returns None if resolution was not possible. ''' # if given a seed location, this is implemented by copying over the # seed location and inserting the given raw text into the Location's # field if seed_location: location = copy.deepcopy(seed_location) else: location = Location() location.address = address_text return resolve_location(location,allow_numberless)
def _geocode_result_to_location(result): coords = result.get_geocoding() return Location(address=result.get_street_address().strip(), postcode=result.get_postalcode().strip(), town=result.get_town().strip(), state=result.get_state().strip(), country=result.get_country().strip(), latitude=coords[0], longitude=coords[1])
def _resolve_result_to_place(result): ''' Returns a tuple of Place, Factual GUID from a Resolve result ''' resolved_loc = Location(country='US', address=result.get('address', '').strip(), town=result.get('locality', '').strip(), state=result.get('region', '').strip(), postcode=result.get('postcode', '').strip(), latitude=result.get('latitude'), longitude=result.get('longitude')) return Place(name=result.get('name'), location=resolved_loc)
def fbloc_to_loc(fbloc): ''' Converts a dict of fields composing a Facebook location to a Location. ''' state = fbloc.get('state', '').strip() # State entry is often full state name if len(state) != 2 and state != '': state = state_name_to_abbrev.get(state, '') return Location(address=fbloc.get('street', '').strip(), town=fbloc.get('city', '').strip(), state=state, postcode=fbloc.get('postcode', '').strip(), latitude=fbloc.get('latitude'), longitude=fbloc.get('longitude'))
def _seeded_resolve(name=None,address=None,postcode=None,town=None,state=None): if seed_location: l = copy.deepcopy(seed_location) else: l = Location() if name: l.name = name if address: l.address = address if postcode: l.postcode = postcode if town: l.town = town if state: l.state = state return resolve_place(Place(name=name,location=l))
def _seeded_resolve(name=None, address=None, postcode=None, town=None, state=None): if seed_location: l = copy.deepcopy(seed_location) else: l = Location() if name: l.name = name if address: l.address = address if postcode: l.postcode = postcode if town: l.town = town if state: l.state = state return resolve_place(Place(name=name, location=l))
def run(): in_filename = os.path.join(os.path.dirname(__file__), 'obid.csv') #clear all tables Location.objects.all().delete() PlaceMeta.objects.all().delete() Place.objects.all().delete() Organization.objects.all().delete() ExternalPlaceSource.objects.all().delete() FacebookPage.objects.all().delete() FacebookOrgRecord.objects.all().delete() gplaces_category_map = load_category_map('google_places') gp_hits, gp_misses = 0, 0 rows = OBIDRow.rows_from_csv(in_filename) # cycle through each row with a facebook reference and store a reference page_mgr = PageImportManager() fb_rows = [row for row in rows if row.fb_id] for row, info in zip( fb_rows, page_mgr.pull_page_info([row.fb_id for row in fb_rows])): if isinstance(info, dict): info.pop('metadata', None) # don't need to store metadata if it exists FacebookPage.objects.get_or_create( fb_id=info['id'], defaults=dict(pageinfo_json=json.dumps(info))) row.fb_id = info['id'] # ensure a numeric id else: print 'ERROR: Pulling fb page %s resulted in the following exception: "%s"' % ( str(row.fb_id), str(info)) row.fb_id = '' # cycle through all rows and store everything for i, row in enumerate(rows): if not row.place: print 'ERROR: no place for entry %d' % i # resolve the location location = resolve_location( Location(address=row.address, postcode='15213')) if location: # hack to get around Google Geocoding appending the unviersity onto all addresses if ( location.address.startswith('University') and not row.address.lower().startswith('univ') ) or \ ( location.address.startswith('Carnegie Mellon') and row.address.lower().startswith('carnegie mellon') ): location.address = ','.join(location.address.split(',')[1:]) try: # if exact match exists, use it instead of the newly found one location = Location.objects.get(address=location.address, postcode=location.postcode) except Location.DoesNotExist: location.save() else: print 'WARNING: Geocoding failed for entry %d ("%s")' % (i, row.place) diff_org = row.org != row.place org, place = None, None # import org # if the row has a fb id, we'll try to import the Org from Facebook # only import Org from Facebook if it's the same as the Place (fb id relates to place only) if row.fb_id and not diff_org: try: org = FacebookOrgRecord.objects.get(fb_id=row.fb_id) except FacebookOrgRecord.DoesNotExist: report = page_mgr.import_org(row.fb_id) if report.model_instance: org = report.model_instance else: print 'WARNING: Organization FB import failed for entry %d (fbid %s)' % ( i, str(row.fb_id)) if not org: org, created = Organization.objects.get_or_create(name=row.org) # import place if row.fb_id: try: place = ExternalPlaceSource.facebook.get(uid=row.fb_id) except ExternalPlaceSource.DoesNotExist: report = page_mgr.import_place(row.fb_id, import_owners=False) if report.model_instance: place = report.model_instance if not place.owner: # no owner is created automatically, so set it if not created place.owner = org place.save() else: print 'WARNING: Place FB import failed for entry %d (fbid %s)' % ( i, str(row.fb_id)) if not place: place, created = Place.objects.get_or_create(name=row.place, location=location, owner=org) if row.url: PlaceMeta.objects.create(place=place, meta_key='url', meta_value=row.url) if not diff_org: # also save the url as the org's url if they're the same org.url = row.url org.save() if row.phone: PlaceMeta.objects.create(place=place, meta_key='phone', meta_value=row.phone) print 'Imported %s' % row.place try: print ' (linked to FB page %s)' % ExternalPlaceSource.facebook.get( place=place).uid except ExternalPlaceSource.DoesNotExist: pass # store tags from Google Place lookup if location and \ location.latitude is not None and location.longitude is not None: coords = (location.latitude, location.longitude) radius = 1000 else: coords = (40.4425, -79.9575) radius = 5000 response = gplaces_client.search_request(coords, radius, keyword=row.place) if len(response) > 0 and 'reference' in response[0]: details = gplaces_client.details_request(response[0]['reference']) all_tags = set() for typ in details.get('types', []): if typ in gplaces_category_map: all_tags.update(gplaces_category_map[typ]) else: print 'WARNING: Unknown Google Places type: "%s"' % typ if len(all_tags) > 0: print ' Tags:', for t in all_tags: print '%s,' % t, print gp_hits += 1 else: print ' WARNING: Failure querying Google Places for "%s" within %dm of (%f,%f)' % ( row.place, radius, coords[0], coords[1]) gp_misses += 1 print gp_hits, gp_misses