def list_pages(self): """generate page strings.""" # XXX argh we apparently need the api_secret, and thus the token / frob dance? # even though this method doesn't need authentication??? flickr = flickrapi.FlickrAPI(self.api_key) extent = ','.join([str(coord) for coord in get_default_bounds().extent]) # Result of each iteration is a JSON string. pagenum = 0 pages = float('inf') while pagenum < pages: pagenum += 1 page = flickr.photos_search(user_id='76787473@N03', page=str(pagenum), safe_search='1', format='json', content_type='1', nojsoncallback='1', extras='date_taken,date_upload,url_m,description,geo,owner_name') # sets = flickr.photosets_getList(user_id='76787473@N03') # print sets # Ugh, we need to find out how many pages there are, so we parse here # and also in parse_list(). adict = simplejson.loads(page) # general debugging try: pages = int(adict['photos']['pages']) print pages except KeyError: self.logger.error("Page content:\n%s" %page) raise RuntimeError("Parsing error, missing 'photos' or 'pages', see above.") yield page
def list_pages(self): """generate page strings.""" # XXX argh we apparently need the api_secret, and thus the token / frob dance? # even though this method doesn't need authentication??? flickr = flickrapi.FlickrAPI(self.api_key, self.api_secret) extent = ','.join([str(coord) for coord in get_default_bounds().extent]) # Result of each iteration is a JSON string. pagenum = 0 pages = float('inf') while pagenum < pages: pagenum += 1 page = flickr.photos_search(has_geo=1, bbox=extent, safe_search='1', min_taken_date=None, #XXX unix timestamp max_taken_date=None, #XXX timestamp per_page='400', page=str(pagenum), extras='date_taken,date_upload,url_sq,description,geo,owner_name', format='json', content_type='1', # photos only. nojsoncallback='1', ) # Ugh, we need to find out how many pages there are, so we parse here # and also in parse_list(). adict = simplejson.loads(page) try: pages = int(adict['photos']['pages']) except KeyError: self.logger.error("Page content:\n%s" %page) raise StopScraping("Parsing error, missing 'photos' or 'pages', see above.") yield page
def handle(self, *args, **options): # First we download a bunch of zipfiles of TIGER data. HERE = os.getcwd() print "Working directory is", HERE OUTDIR = os.path.join(HERE, 'tiger_data') BASEURL='ftp://ftp2.census.gov/geo/tiger/TIGER2010' # 25 = MA; 25025 = Suffolk County, MA. ZIPS = ("PLACE/2010/tl_2010_25_place10.zip", "EDGES/tl_2010_25025_edges.zip", "FACES/tl_2010_25025_faces.zip", "FEATNAMES/tl_2010_25025_featnames.zip" ) makedirs(OUTDIR) or die("couldn't create directory %s" % OUTDIR) for fname in ZIPS: wget('%s/%s' % (BASEURL, fname), cwd=OUTDIR) or die( "Could not download %s/%s" % (BASEURL, fname)) import glob for fname in glob.glob(os.path.join(OUTDIR, '*zip')): unzip(fname, cwd=OUTDIR) or die("Could not unzip %s" % fname) print "Shapefiles unzipped in %s" % OUTDIR # Now we load them into our blocks table. from ebpub.streets.blockimport.tiger import import_blocks from ebpub.utils.geodjango import get_default_bounds print "Importing blocks, this may take several minutes ..." # Passing --city means we skip features labeled for other cities. importer = import_blocks.TigerImporter( '%s/tl_2010_25025_edges.shp' % OUTDIR, '%s/tl_2010_25025_featnames.dbf' % OUTDIR, '%s/tl_2010_25025_faces.dbf' % OUTDIR, '%s/tl_2010_25_place10.shp' % OUTDIR, encoding='utf8', filter_bounds=get_default_bounds(), filter_city='BOSTON', reset=options['reset']) num_created, num_existing = importer.save() print "Created %d blocks (%d existing)" % (num_created, num_existing) ######################### print "Populating streets and fixing addresses, these can take several minutes..." #cd $SOURCE_ROOT/ebpub/ebpub/streets/bin || die # Note these scripts should be run ONCE, in this order, # after you have imported *all* your blocks. from ebpub.streets.bin import populate_streets populate_streets.main(['-v', '-v', '-v', '-v', 'streets']) populate_streets.main(['-v', '-v', '-v', '-v', 'block_intersections']) populate_streets.main(['-v', '-v', '-v', '-v', 'intersections']) print "Done."
def handle(self, *args, **options): # First we download a bunch of zipfiles of TIGER data. HERE = os.getcwd() print "Working directory is", HERE OUTDIR = os.path.join(HERE, 'tiger_data') BASEURL='ftp://ftp2.census.gov/geo/tiger/TIGER2010' # 25 = MA; 25025 = Suffolk County, MA. ZIPS = ("PLACE/2010/tl_2010_25_place10.zip", "EDGES/tl_2010_25025_edges.zip", "FACES/tl_2010_25025_faces.zip", "FEATNAMES/tl_2010_25025_featnames.zip" ) makedirs(OUTDIR) or die("couldn't create directory %s" % OUTDIR) for fname in ZIPS: wget('%s/%s' % (BASEURL, fname), cwd=OUTDIR) or die( "Could not download %s/%s" % (BASEURL, fname)) import glob for fname in glob.glob(os.path.join(OUTDIR, '*zip')): unzip(fname, cwd=OUTDIR) or die("Could not unzip %s" % fname) print "Shapefiles unzipped in %s" % OUTDIR # Now we load them into our blocks table. from ebpub.streets.blockimport.tiger import import_blocks from ebpub.utils.geodjango import get_default_bounds print "Importing blocks, this may take several minutes ..." # Passing --city means we skip features labeled for other cities. importer = import_blocks.TigerImporter( '%s/tl_2010_25025_edges.shp' % OUTDIR, '%s/tl_2010_25025_featnames.dbf' % OUTDIR, '%s/tl_2010_25025_faces.dbf' % OUTDIR, '%s/tl_2010_25_place10.shp' % OUTDIR, encoding='utf8', filter_bounds=get_default_bounds(), filter_city='BOSTON') num_created = importer.save() print "Created %d blocks" % num_created ######################### print "Populating streets and fixing addresses, these can take several minutes..." #cd $SOURCE_ROOT/ebpub/ebpub/streets/bin || die # Note these scripts should be run ONCE, in this order, # after you have imported *all* your blocks. from ebpub.streets.bin import populate_streets populate_streets.main(['-v', '-v', '-v', '-v', 'streets']) populate_streets.main(['-v', '-v', '-v', '-v', 'block_intersections']) populate_streets.main(['-v', '-v', '-v', '-v', 'intersections']) print "Done."
def __init__(self, layer, location_type, source='UNKNOWN', filter_bounds=False, verbose=False): self.layer = layer metro = get_metro() self.metro_name = metro['metro_name'].upper() self.now = datetime.datetime.now() self.location_type = location_type self.source = source self.filter_bounds = filter_bounds self.verbose = verbose if self.filter_bounds: from ebpub.utils.geodjango import get_default_bounds self.bounds = get_default_bounds()
def __init__(self, api_url, api_key=None, jurisdiction_id=None, schema_slug='open311-service-requests', http_cache=None, seconds_between_requests=2.0, days_prior=90, timeout=60, bounds=None, html_url_template=None): """ If ``bounds`` is passed, it should be a geometry; news items that don't intersect with that geometry will be skipped. Default bounds is the extent defined in settings.METRO_LIST. If ``html_url_template`` is given, the service_request id is replaced into the string to form the news item's url. eg http://somewhere/%s.html. This is not really part of the GeoReport v2 API, but in some cases, like SeeClickFix, there is a well known location based on the identifier for an item. """ self.api_url = api_url if not self.api_url.endswith('/'): self.api_url += '/' self.days_prior = days_prior self.seconds_between_requests = seconds_between_requests self.schema_slug = schema_slug self.schema = Schema.objects.get(slug=self.schema_slug) self.service_request_id_field = SchemaField.objects.get( schema=self.schema, name='service_request_id') self.standard_params = {} if api_key is not None: self.standard_params['api_key'] = api_key if jurisdiction_id is not None: self.standard_parms['jurisdiction_id'] = jurisdiction_id self.http = Http(http_cache, timeout=timeout) self.bounds = bounds if bounds is None: log.info( "Calculating geographic boundaries from the extent in settings.METRO_LIST" ) self.bounds = get_default_bounds() try: # Make sure it's a geos geometry, not an ogr/gdal geometry, # so we can test for intersecting geos Points. self.bounds = self.bounds.geos except AttributeError: pass self.html_url_template = html_url_template
def __init__( self, api_url, api_key=None, jurisdiction_id=None, schema_slug="open311-service-requests", http_cache=None, seconds_between_requests=2.0, days_prior=90, timeout=60, bounds=None, html_url_template=None, ): """ If ``bounds`` is passed, it should be a geometry; news items that don't intersect with that geometry will be skipped. Default bounds is the extent defined in settings.METRO_LIST. If ``html_url_template`` is given, the service_request id is replaced into the string to form the news item's url. eg http://somewhere/%s.html. This is not really part of the GeoReport v2 API, but in some cases, like SeeClickFix, there is a well known location based on the identifier for an item. """ self.api_url = api_url if not self.api_url.endswith("/"): self.api_url += "/" self.days_prior = days_prior self.seconds_between_requests = seconds_between_requests self.schema_slug = schema_slug self.schema = Schema.objects.get(slug=self.schema_slug) self.service_request_id_field = SchemaField.objects.get(schema=self.schema, name="service_request_id") self.standard_params = {} if api_key is not None: self.standard_params["api_key"] = api_key if jurisdiction_id is not None: self.standard_parms["jurisdiction_id"] = jurisdiction_id self.http = Http(http_cache, timeout=timeout) self.bounds = bounds if bounds is None: log.info("Calculating geographic boundaries from the extent in settings.METRO_LIST") self.bounds = get_default_bounds() try: # Make sure it's a geos geometry, not an ogr/gdal geometry, # so we can test for intersecting geos Points. self.bounds = self.bounds.geos except AttributeError: pass self.html_url_template = html_url_template
def main(dry_run=True): items_outside = list(NewsItem.objects.exclude(location__intersects=get_default_bounds())) print "Items outside bounds: %s" % len(items_outside) for item in items_outside: fix_newsitem_coords(item, dry_run) print "-" * 60 items_no_loc_name = list(NewsItem.objects.filter(location_name='')) print print "=" * 60 print "Items with no location name: %s" % len(items_no_loc_name) for item in items_no_loc_name: fix_newsitem_loc_name(item, dry_run) print "-" * 60
def main(dry_run=True): items_outside = list( NewsItem.objects.exclude(location__intersects=get_default_bounds())) print "Items outside bounds: %s" % len(items_outside) for item in items_outside: fix_newsitem_coords(item, dry_run) print "-" * 60 items_no_loc_name = list(NewsItem.objects.filter(location_name='')) print print "=" * 60 print "Items with no location name: %s" % len(items_no_loc_name) for item in items_no_loc_name: fix_newsitem_loc_name(item, dry_run) print "-" * 60
def list_pages(self): """generate page strings.""" # XXX argh we apparently need the api_secret, and thus the token / frob dance? # even though this method doesn't need authentication??? flickr = flickrapi.FlickrAPI(self.api_key, self.api_secret) extent = ','.join( [str(coord) for coord in get_default_bounds().extent]) # Result of each iteration is a JSON string. pagenum = 0 pages = float('inf') while pagenum < pages: pagenum += 1 page = flickr.photos_search( has_geo=1, bbox=extent, safe_search='1', min_taken_date=self.min_timestamp, max_taken_date=self.max_timestamp, per_page='400', page=str(pagenum), extras= 'date_taken,date_upload,url_sq,description,geo,owner_name', format='json', content_type='1', # photos only. nojsoncallback='1', ) # Ugh, we need to find out how many pages there are, so we parse here # and also in parse_list(). adict = simplejson.loads(page) try: pages = int(adict['photos']['pages']) except KeyError: if adict.get('stat') == 'fail': self.logger.error("Flickr error code %r: %s" % (adict['code'], adict['message'])) else: self.logger.error("Page content:\n%s" % page) raise StopScraping( "Parsing error, missing 'photos' or 'pages', see above.") yield page
def handle(self, county, **options): # First we download a bunch of zipfiles of TIGER data. if options['dir']: TMP = options['dir'] download = not os.path.exists(TMP) if download: os.makedirs(TMP) else: TMP = tempfile.mkdtemp() download = True os.chdir(TMP) OUTDIR = os.path.join(TMP, 'tiger_data') STATE = '37' # NC if download: print 'Download TIGER data to %s' % TMP BASEURL= 'ftp://ftp2.census.gov/geo/tiger/TIGER2010' ZIPS = ("PLACE/2010/tl_2010_%s_place10.zip" % STATE, "EDGES/tl_2010_%s_edges.zip" % county, "FACES/tl_2010_%s_faces.zip" % county, "FEATNAMES/tl_2010_%s_featnames.zip" % county, ) makedirs(OUTDIR) or die("couldn't create directory %s" % OUTDIR) for fname in ZIPS: wget('%s/%s' % (BASEURL, fname), cwd=OUTDIR) or die( "Could not download %s/%s" % (BASEURL, fname)) import glob for fname in glob.glob(os.path.join(OUTDIR, '*zip')): unzip(fname, cwd=OUTDIR) or die("Could not unzip %s" % fname) print "Shapefiles unzipped in %s" % OUTDIR # Now we load them into our blocks table. from ebpub.streets.blockimport.tiger import import_blocks from ebpub.utils.geodjango import get_default_bounds print "Importing blocks, this may take several minutes ..." # Passing --city means we skip features labeled for other cities. importer = import_blocks.TigerImporter( '%s/tl_2010_%s_edges.shp' % (OUTDIR, county), '%s/tl_2010_%s_featnames.dbf' % (OUTDIR, county), '%s/tl_2010_%s_faces.dbf' % (OUTDIR, county), '%s/tl_2010_%s_place10.shp' % (OUTDIR, STATE), encoding='utf8', filter_bounds=get_default_bounds()) num_created = importer.save() print "Created %d blocks" % num_created ######################### print "Populating streets and fixing addresses, these can take several minutes..." # Note these scripts should be run ONCE, in this order, # after you have imported *all* your blocks. from ebpub.streets.bin import populate_streets populate_streets.main(['streets']) populate_streets.main(['block_intersections']) populate_streets.main(['intersections']) print "Done." print "Removing temp directory %s" % TMP if not options['dir']: os.system('rm -rf %s' % TMP)
def main(argv=None): if argv is None: argv = sys.argv[1:] parser = optparse.OptionParser(usage='%prog edges.shp featnames.dbf faces.dbf place.shp') parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False) parser.add_option('-c', '--city', dest='city', help='A city name to filter against') parser.add_option('-f', '--fix-cities', action="store_true", default=False, help='Whether to override "city" attribute of blocks and ' 'streets by finding an intersecting Location of a city-ish ' 'type. Only makes sense if you have configured ' 'multiple_cities=True in the METRO_LIST of your settings.py, ' 'and after you have created some appropriate Locations.') parser.add_option('-b', '--filter-bounds', action="store", default=1, type='int', help='Whether to skip blocks outside the metro extent from your ' 'settings.py. Default 1 (true); use 0 to disable.') parser.add_option('-l', '--filter-location', action="append", help='A location (spelled as location-type-slug:location-slug) ' 'that will be used to filter out blocks outside its boundaries. ' 'May be passed more than once.' ) parser.add_option('-e', '--encoding', dest='encoding', help='Encoding to use when reading the shapefile', default='utf8') (options, args) = parser.parse_args(argv) if len(args) != 4: return parser.error('must provide 4 arguments, see usage') if options.filter_bounds: from ebpub.utils.geodjango import get_default_bounds filter_bounds = get_default_bounds() else: filter_bounds = None # Optionally filter on bounds of some Locations too. loc_bounds = None for locslug in options.filter_location or []: typeslug, locslug = locslug.split(':', 1) from ebpub.db.models import Location location = Location.objects.get(location_type__slug=typeslug, slug=locslug) if loc_bounds is None: loc_bounds = location.location else: loc_bounds = loc_bounds.union(location.location) if None not in (filter_bounds, loc_bounds): filter_bounds = filter_bounds.intersection(loc_bounds) elif loc_bounds is not None: filter_bounds = loc_bounds else: filter_bounds = filter_bounds tiger = TigerImporter(*args, verbose=options.verbose, filter_city=options.city, filter_bounds=filter_bounds, encoding=options.encoding, fix_cities=options.fix_cities) if options.verbose: import logging logger.setLevel(logging.DEBUG) num_created = tiger.save() logger.info( "Created %d new blocks" % num_created) logger.debug("... from %d feature names" % len(tiger.featnames_db)) logger.debug("feature tlids with blocks: %d" % len(tiger.tlids_with_blocks)) import pprint tlids_wo_blocks = set(tiger.featnames_db.keys()).difference(tiger.tlids_with_blocks) logger.debug("feature tlids WITHOUT blocks: %d" % len(tlids_wo_blocks)) all_rows = [] for t in tlids_wo_blocks: all_rows.extend(tiger.featnames_db[t]) logger.debug("Rows: %d" % len(all_rows)) names = [(r['FULLNAME'], r['TLID']) for r in all_rows] names.sort() logger.debug( "=================") for n, t in names: logger.debug("%s %s" % (n, t)) for tlid in sorted(tlids_wo_blocks)[:10]: feat = tiger.featnames_db[tlid] logger.debug(pprint.pformat(feat))
def main(argv=None): if argv is None: argv = sys.argv[1:] parser = optparse.OptionParser( usage='%prog edges.shp featnames.dbf faces.dbf place.shp') parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False) parser.add_option('-c', '--city', dest='city', help='A city name to filter against') parser.add_option( '-f', '--fix-cities', action="store_true", default=False, help='Whether to override "city" attribute of blocks and ' 'streets by finding an intersecting Location of a city-ish ' 'type. Only makes sense if you have configured ' 'multiple_cities=True in the METRO_LIST of your settings.py, ' 'and after you have created some appropriate Locations.') parser.add_option( '-b', '--filter-bounds', action="store", default=1, type='int', help='Whether to skip blocks outside the metro extent from your ' 'settings.py. Default 1 (true); use 0 to disable.') parser.add_option( '-l', '--filter-location', action="append", help='A location (spelled as location-type-slug:location-slug) ' 'that will be used to filter out blocks outside its boundaries. ' 'May be passed more than once.') parser.add_option('-e', '--encoding', dest='encoding', help='Encoding to use when reading the shapefile', default='utf8') (options, args) = parser.parse_args(argv) if len(args) != 4: return parser.error('must provide 4 arguments, see usage') if options.filter_bounds: from ebpub.utils.geodjango import get_default_bounds filter_bounds = get_default_bounds() else: filter_bounds = None # Optionally filter on bounds of some Locations too. loc_bounds = None for locslug in options.filter_location or []: typeslug, locslug = locslug.split(':', 1) from ebpub.db.models import Location location = Location.objects.get(location_type__slug=typeslug, slug=locslug) if loc_bounds is None: loc_bounds = location.location else: loc_bounds = loc_bounds.union(location.location) if None not in (filter_bounds, loc_bounds): filter_bounds = filter_bounds.intersection(loc_bounds) elif loc_bounds is not None: filter_bounds = loc_bounds else: filter_bounds = filter_bounds tiger = TigerImporter(*args, verbose=options.verbose, filter_city=options.city, filter_bounds=filter_bounds, encoding=options.encoding, fix_cities=options.fix_cities) if options.verbose: import logging logger.setLevel(logging.DEBUG) num_created = tiger.save() logger.info("Created %d new blocks" % num_created) logger.debug("... from %d feature names" % len(tiger.featnames_db)) logger.debug("feature tlids with blocks: %d" % len(tiger.tlids_with_blocks)) import pprint tlids_wo_blocks = set(tiger.featnames_db.keys()).difference( tiger.tlids_with_blocks) logger.debug("feature tlids WITHOUT blocks: %d" % len(tlids_wo_blocks)) all_rows = [] for t in tlids_wo_blocks: all_rows.extend(tiger.featnames_db[t]) logger.debug("Rows: %d" % len(all_rows)) names = [(r['FULLNAME'], r['TLID']) for r in all_rows] names.sort() logger.debug("=================") for n, t in names: logger.debug("%s %s" % (n, t)) for tlid in sorted(tlids_wo_blocks)[:10]: feat = tiger.featnames_db[tlid] logger.debug(pprint.pformat(feat))