def list_pages(self):
        """generate page strings."""

        # XXX argh we apparently need the api_secret, and thus the token / frob dance?
        # even though this method doesn't need authentication???
        flickr = flickrapi.FlickrAPI(self.api_key)
        extent = ','.join([str(coord) for coord in get_default_bounds().extent])

        # Result of each iteration is a JSON string.
        pagenum = 0
        pages = float('inf')
        while pagenum < pages:
            pagenum += 1
            page = flickr.photos_search(user_id='76787473@N03',
                                        page=str(pagenum),
                                        safe_search='1',
                                        format='json',
                                        content_type='1',
                                        nojsoncallback='1',
                                        extras='date_taken,date_upload,url_m,description,geo,owner_name')
	   # sets = flickr.photosets_getList(user_id='76787473@N03')

           # print sets
            # Ugh, we need to find out how many pages there are, so we parse here
            # and also in parse_list().
            adict = simplejson.loads(page)

	    # general debugging
	    try:
                pages = int(adict['photos']['pages'])
		print pages
            except KeyError:
                self.logger.error("Page content:\n%s" %page)
                raise RuntimeError("Parsing error, missing 'photos' or 'pages', see above.")
            yield page
Example #2
0
    def list_pages(self):
        """generate page strings."""

        # XXX argh we apparently need the api_secret, and thus the token / frob dance?
        # even though this method doesn't need authentication???
        flickr = flickrapi.FlickrAPI(self.api_key, self.api_secret)
        extent = ','.join([str(coord) for coord in get_default_bounds().extent])

        # Result of each iteration is a JSON string.
        pagenum = 0
        pages = float('inf')
        while pagenum < pages:
            pagenum += 1
            page = flickr.photos_search(has_geo=1, bbox=extent, safe_search='1',
                                        min_taken_date=None, #XXX unix timestamp
                                        max_taken_date=None, #XXX timestamp
                                        per_page='400',
                                        page=str(pagenum),
                                        extras='date_taken,date_upload,url_sq,description,geo,owner_name',
                                        format='json',
                                        content_type='1', # photos only.
                                        nojsoncallback='1',
                                        )

            # Ugh, we need to find out how many pages there are, so we parse here
            # and also in parse_list().
            adict = simplejson.loads(page)
            try:
                pages = int(adict['photos']['pages'])
            except KeyError:
                self.logger.error("Page content:\n%s" %page)
                raise StopScraping("Parsing error, missing 'photos' or 'pages', see above.")
            yield page
    def handle(self, *args, **options):
        # First we download a bunch of zipfiles of TIGER data.
        HERE = os.getcwd()
        print "Working directory is", HERE
        OUTDIR = os.path.join(HERE, 'tiger_data')
        BASEURL='ftp://ftp2.census.gov/geo/tiger/TIGER2010'
        # 25 = MA; 25025 = Suffolk County, MA.
        ZIPS = ("PLACE/2010/tl_2010_25_place10.zip",
                "EDGES/tl_2010_25025_edges.zip",
                "FACES/tl_2010_25025_faces.zip",
                "FEATNAMES/tl_2010_25025_featnames.zip"
                )
        makedirs(OUTDIR) or die("couldn't create directory %s" % OUTDIR)
        for fname in ZIPS:
            wget('%s/%s' % (BASEURL, fname), cwd=OUTDIR) or die(
                "Could not download %s/%s" % (BASEURL, fname))

        import glob
        for fname in glob.glob(os.path.join(OUTDIR, '*zip')):
            unzip(fname, cwd=OUTDIR) or die("Could not unzip %s" % fname)
        print "Shapefiles unzipped in %s" % OUTDIR

        # Now we load them into our blocks table.
        from ebpub.streets.blockimport.tiger import import_blocks
        from ebpub.utils.geodjango import get_default_bounds
        print "Importing blocks, this may take several minutes ..."

        # Passing --city means we skip features labeled for other cities.

        importer = import_blocks.TigerImporter(
            '%s/tl_2010_25025_edges.shp' % OUTDIR,
            '%s/tl_2010_25025_featnames.dbf' % OUTDIR,
            '%s/tl_2010_25025_faces.dbf' % OUTDIR,
            '%s/tl_2010_25_place10.shp' % OUTDIR,
            encoding='utf8',
            filter_bounds=get_default_bounds(),
            filter_city='BOSTON',
            reset=options['reset'])
        num_created, num_existing = importer.save()
        print "Created %d blocks (%d existing)" % (num_created, num_existing)

        #########################

        print "Populating streets and fixing addresses, these can take several minutes..."

        #cd $SOURCE_ROOT/ebpub/ebpub/streets/bin || die

        # Note these scripts should be run ONCE, in this order,
        # after you have imported *all* your blocks.

        from ebpub.streets.bin import populate_streets
        populate_streets.main(['-v', '-v', '-v', '-v', 'streets'])
        populate_streets.main(['-v', '-v', '-v', '-v', 'block_intersections'])
        populate_streets.main(['-v', '-v', '-v', '-v', 'intersections'])
        print "Done."
    def handle(self, *args, **options):
        # First we download a bunch of zipfiles of TIGER data.
        HERE = os.getcwd()
        print "Working directory is", HERE
        OUTDIR = os.path.join(HERE, 'tiger_data')
        BASEURL='ftp://ftp2.census.gov/geo/tiger/TIGER2010'
        # 25 = MA; 25025 = Suffolk County, MA.
        ZIPS = ("PLACE/2010/tl_2010_25_place10.zip",
                "EDGES/tl_2010_25025_edges.zip",
                "FACES/tl_2010_25025_faces.zip",
                "FEATNAMES/tl_2010_25025_featnames.zip"
                )
        makedirs(OUTDIR) or die("couldn't create directory %s" % OUTDIR)
        for fname in ZIPS:
            wget('%s/%s' % (BASEURL, fname), cwd=OUTDIR) or die(
                "Could not download %s/%s" % (BASEURL, fname))

        import glob
        for fname in glob.glob(os.path.join(OUTDIR, '*zip')):
            unzip(fname, cwd=OUTDIR) or die("Could not unzip %s" % fname)
        print "Shapefiles unzipped in %s" % OUTDIR

        # Now we load them into our blocks table.
        from ebpub.streets.blockimport.tiger import import_blocks
        from ebpub.utils.geodjango import get_default_bounds
        print "Importing blocks, this may take several minutes ..."

        # Passing --city means we skip features labeled for other cities.

        importer = import_blocks.TigerImporter(
            '%s/tl_2010_25025_edges.shp' % OUTDIR,
            '%s/tl_2010_25025_featnames.dbf' % OUTDIR,
            '%s/tl_2010_25025_faces.dbf' % OUTDIR,
            '%s/tl_2010_25_place10.shp' % OUTDIR,
            encoding='utf8',
            filter_bounds=get_default_bounds(),
            filter_city='BOSTON')
        num_created = importer.save()
        print "Created %d blocks" % num_created

        #########################

        print "Populating streets and fixing addresses, these can take several minutes..."

        #cd $SOURCE_ROOT/ebpub/ebpub/streets/bin || die

        # Note these scripts should be run ONCE, in this order,
        # after you have imported *all* your blocks.

        from ebpub.streets.bin import populate_streets
        populate_streets.main(['-v', '-v', '-v', '-v', 'streets'])
        populate_streets.main(['-v', '-v', '-v', '-v', 'block_intersections'])
        populate_streets.main(['-v', '-v', '-v', '-v', 'intersections'])
        print "Done."
Example #5
0
 def __init__(self, layer, location_type, source='UNKNOWN', filter_bounds=False, verbose=False):
     self.layer = layer
     metro = get_metro()
     self.metro_name = metro['metro_name'].upper()
     self.now = datetime.datetime.now()
     self.location_type = location_type
     self.source = source
     self.filter_bounds = filter_bounds
     self.verbose = verbose
     if self.filter_bounds:
         from ebpub.utils.geodjango import get_default_bounds
         self.bounds = get_default_bounds()
Example #6
0
    def __init__(self,
                 api_url,
                 api_key=None,
                 jurisdiction_id=None,
                 schema_slug='open311-service-requests',
                 http_cache=None,
                 seconds_between_requests=2.0,
                 days_prior=90,
                 timeout=60,
                 bounds=None,
                 html_url_template=None):
        """
        If ``bounds`` is passed, it should be a geometry; news items
        that don't intersect with that geometry will be skipped.
        Default bounds is the extent defined in settings.METRO_LIST.

        If ``html_url_template`` is given, the service_request id is 
        replaced into the string to form the news item's url. eg
        http://somewhere/%s.html.  This is not really part of the GeoReport v2 API, but
        in some cases, like SeeClickFix, there is a well known location based on 
        the identifier for an item.
        """
        self.api_url = api_url
        if not self.api_url.endswith('/'):
            self.api_url += '/'

        self.days_prior = days_prior
        self.seconds_between_requests = seconds_between_requests
        self.schema_slug = schema_slug
        self.schema = Schema.objects.get(slug=self.schema_slug)
        self.service_request_id_field = SchemaField.objects.get(
            schema=self.schema, name='service_request_id')

        self.standard_params = {}
        if api_key is not None:
            self.standard_params['api_key'] = api_key
        if jurisdiction_id is not None:
            self.standard_parms['jurisdiction_id'] = jurisdiction_id

        self.http = Http(http_cache, timeout=timeout)
        self.bounds = bounds
        if bounds is None:
            log.info(
                "Calculating geographic boundaries from the extent in settings.METRO_LIST"
            )
            self.bounds = get_default_bounds()
            try:
                # Make sure it's a geos geometry, not an ogr/gdal geometry,
                # so we can test for intersecting geos Points.
                self.bounds = self.bounds.geos
            except AttributeError:
                pass
        self.html_url_template = html_url_template
Example #7
0
    def __init__(
        self,
        api_url,
        api_key=None,
        jurisdiction_id=None,
        schema_slug="open311-service-requests",
        http_cache=None,
        seconds_between_requests=2.0,
        days_prior=90,
        timeout=60,
        bounds=None,
        html_url_template=None,
    ):
        """
        If ``bounds`` is passed, it should be a geometry; news items
        that don't intersect with that geometry will be skipped.
        Default bounds is the extent defined in settings.METRO_LIST.

        If ``html_url_template`` is given, the service_request id is 
        replaced into the string to form the news item's url. eg
        http://somewhere/%s.html.  This is not really part of the GeoReport v2 API, but
        in some cases, like SeeClickFix, there is a well known location based on 
        the identifier for an item.
        """
        self.api_url = api_url
        if not self.api_url.endswith("/"):
            self.api_url += "/"

        self.days_prior = days_prior
        self.seconds_between_requests = seconds_between_requests
        self.schema_slug = schema_slug
        self.schema = Schema.objects.get(slug=self.schema_slug)
        self.service_request_id_field = SchemaField.objects.get(schema=self.schema, name="service_request_id")

        self.standard_params = {}
        if api_key is not None:
            self.standard_params["api_key"] = api_key
        if jurisdiction_id is not None:
            self.standard_parms["jurisdiction_id"] = jurisdiction_id

        self.http = Http(http_cache, timeout=timeout)
        self.bounds = bounds
        if bounds is None:
            log.info("Calculating geographic boundaries from the extent in settings.METRO_LIST")
            self.bounds = get_default_bounds()
            try:
                # Make sure it's a geos geometry, not an ogr/gdal geometry,
                # so we can test for intersecting geos Points.
                self.bounds = self.bounds.geos
            except AttributeError:
                pass
        self.html_url_template = html_url_template
def main(dry_run=True):
    items_outside = list(NewsItem.objects.exclude(location__intersects=get_default_bounds()))
    print "Items outside bounds: %s" % len(items_outside)
    for item in items_outside:
        fix_newsitem_coords(item, dry_run)
        print "-" * 60
    items_no_loc_name = list(NewsItem.objects.filter(location_name=''))
    print
    print "=" * 60
    print "Items with no location name: %s" % len(items_no_loc_name)
    for item in items_no_loc_name:
        fix_newsitem_loc_name(item, dry_run)
        print "-" * 60
def main(dry_run=True):
    items_outside = list(
        NewsItem.objects.exclude(location__intersects=get_default_bounds()))
    print "Items outside bounds: %s" % len(items_outside)
    for item in items_outside:
        fix_newsitem_coords(item, dry_run)
        print "-" * 60
    items_no_loc_name = list(NewsItem.objects.filter(location_name=''))
    print
    print "=" * 60
    print "Items with no location name: %s" % len(items_no_loc_name)
    for item in items_no_loc_name:
        fix_newsitem_loc_name(item, dry_run)
        print "-" * 60
Example #10
0
    def list_pages(self):
        """generate page strings."""

        # XXX argh we apparently need the api_secret, and thus the token / frob dance?
        # even though this method doesn't need authentication???
        flickr = flickrapi.FlickrAPI(self.api_key, self.api_secret)
        extent = ','.join(
            [str(coord) for coord in get_default_bounds().extent])

        # Result of each iteration is a JSON string.
        pagenum = 0
        pages = float('inf')
        while pagenum < pages:
            pagenum += 1
            page = flickr.photos_search(
                has_geo=1,
                bbox=extent,
                safe_search='1',
                min_taken_date=self.min_timestamp,
                max_taken_date=self.max_timestamp,
                per_page='400',
                page=str(pagenum),
                extras=
                'date_taken,date_upload,url_sq,description,geo,owner_name',
                format='json',
                content_type='1',  # photos only.
                nojsoncallback='1',
            )

            # Ugh, we need to find out how many pages there are, so we parse here
            # and also in parse_list().
            adict = simplejson.loads(page)
            try:
                pages = int(adict['photos']['pages'])
            except KeyError:
                if adict.get('stat') == 'fail':
                    self.logger.error("Flickr error code %r: %s" %
                                      (adict['code'], adict['message']))
                else:
                    self.logger.error("Page content:\n%s" % page)
                raise StopScraping(
                    "Parsing error, missing 'photos' or 'pages', see above.")
            yield page
    def handle(self, county, **options):
        # First we download a bunch of zipfiles of TIGER data.
        if options['dir']:
            TMP = options['dir']
            download = not os.path.exists(TMP)
            if download:
                os.makedirs(TMP)
        else:
            TMP = tempfile.mkdtemp()
            download = True
        os.chdir(TMP)
        OUTDIR = os.path.join(TMP, 'tiger_data')
        STATE = '37' # NC
        if download:
            print 'Download TIGER data to %s' % TMP
            BASEURL= 'ftp://ftp2.census.gov/geo/tiger/TIGER2010'
            ZIPS = ("PLACE/2010/tl_2010_%s_place10.zip" % STATE,
                    "EDGES/tl_2010_%s_edges.zip" % county,
                    "FACES/tl_2010_%s_faces.zip" % county,
                    "FEATNAMES/tl_2010_%s_featnames.zip" % county,
                    )
            makedirs(OUTDIR) or die("couldn't create directory %s" % OUTDIR)
            for fname in ZIPS:
                wget('%s/%s' % (BASEURL, fname), cwd=OUTDIR) or die(
                    "Could not download %s/%s" % (BASEURL, fname))

            import glob
            for fname in glob.glob(os.path.join(OUTDIR, '*zip')):
                unzip(fname, cwd=OUTDIR) or die("Could not unzip %s" % fname)
            print "Shapefiles unzipped in %s" % OUTDIR

        # Now we load them into our blocks table.
        from ebpub.streets.blockimport.tiger import import_blocks
        from ebpub.utils.geodjango import get_default_bounds
        print "Importing blocks, this may take several minutes ..."

        # Passing --city means we skip features labeled for other cities.

        importer = import_blocks.TigerImporter(
            '%s/tl_2010_%s_edges.shp' % (OUTDIR, county),
            '%s/tl_2010_%s_featnames.dbf' % (OUTDIR, county),
            '%s/tl_2010_%s_faces.dbf' % (OUTDIR, county),
            '%s/tl_2010_%s_place10.shp' % (OUTDIR, STATE),
            encoding='utf8',
            filter_bounds=get_default_bounds())
        num_created = importer.save()
        print "Created %d blocks" % num_created

        #########################

        print "Populating streets and fixing addresses, these can take several minutes..."

        # Note these scripts should be run ONCE, in this order,
        # after you have imported *all* your blocks.

        from ebpub.streets.bin import populate_streets
        populate_streets.main(['streets'])
        populate_streets.main(['block_intersections'])
        populate_streets.main(['intersections'])
        print "Done."

        print "Removing temp directory %s" % TMP
        if not options['dir']:
            os.system('rm -rf %s' % TMP)
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]
    parser = optparse.OptionParser(usage='%prog edges.shp featnames.dbf faces.dbf place.shp')
    parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False)
    parser.add_option('-c', '--city', dest='city', help='A city name to filter against')
    parser.add_option('-f', '--fix-cities', action="store_true", default=False,
                      help='Whether to override "city" attribute of blocks and '
                      'streets by finding an intersecting Location of a city-ish '
                      'type. Only makes sense if you have configured '
                      'multiple_cities=True in the METRO_LIST of your settings.py, '
                      'and after you have created some appropriate Locations.')

    parser.add_option('-b', '--filter-bounds', action="store", default=1,
                      type='int',
                      help='Whether to skip blocks outside the metro extent from your '
                      'settings.py. Default 1 (true); use 0 to disable.')
    parser.add_option('-l', '--filter-location', action="append",
                      help='A location (spelled as location-type-slug:location-slug) '
                      'that will be used to filter out blocks outside its boundaries. '
                      'May be passed more than once.'
                      )

    parser.add_option('-e', '--encoding', dest='encoding',
                      help='Encoding to use when reading the shapefile',
                      default='utf8')
    (options, args) = parser.parse_args(argv)
    if len(args) != 4:
        return parser.error('must provide 4 arguments, see usage')

    if options.filter_bounds:
        from ebpub.utils.geodjango import get_default_bounds
        filter_bounds = get_default_bounds()
    else:
        filter_bounds = None

    # Optionally filter on bounds of some Locations too.
    loc_bounds = None
    for locslug in options.filter_location or []:
        typeslug, locslug = locslug.split(':', 1)
        from ebpub.db.models import Location
        location = Location.objects.get(location_type__slug=typeslug, slug=locslug)
        if loc_bounds is None:
            loc_bounds = location.location
        else:
            loc_bounds = loc_bounds.union(location.location)

    if None not in (filter_bounds, loc_bounds):
        filter_bounds = filter_bounds.intersection(loc_bounds)
    elif loc_bounds is not None:
        filter_bounds = loc_bounds
    else:
        filter_bounds = filter_bounds

    tiger = TigerImporter(*args, verbose=options.verbose,
                           filter_city=options.city, 
                           filter_bounds=filter_bounds,
                           encoding=options.encoding,
                           fix_cities=options.fix_cities)
    if options.verbose:
        import logging
        logger.setLevel(logging.DEBUG)
    num_created = tiger.save()
    logger.info( "Created %d new blocks" % num_created)
    logger.debug("... from %d feature names" % len(tiger.featnames_db))
    logger.debug("feature tlids with blocks: %d" % len(tiger.tlids_with_blocks))

    import pprint
    tlids_wo_blocks = set(tiger.featnames_db.keys()).difference(tiger.tlids_with_blocks)
    logger.debug("feature tlids WITHOUT blocks: %d" % len(tlids_wo_blocks))
    all_rows = []
    for t in tlids_wo_blocks:
        all_rows.extend(tiger.featnames_db[t])
    logger.debug("Rows: %d" % len(all_rows))
    names = [(r['FULLNAME'], r['TLID']) for r in all_rows]
    names.sort()
    logger.debug( "=================")
    for n, t in names:
        logger.debug("%s %s" % (n, t))
    for tlid in sorted(tlids_wo_blocks)[:10]:
        feat = tiger.featnames_db[tlid]
        logger.debug(pprint.pformat(feat))
Example #13
0
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]
    parser = optparse.OptionParser(
        usage='%prog edges.shp featnames.dbf faces.dbf place.shp')
    parser.add_option('-v',
                      '--verbose',
                      action='store_true',
                      dest='verbose',
                      default=False)
    parser.add_option('-c',
                      '--city',
                      dest='city',
                      help='A city name to filter against')
    parser.add_option(
        '-f',
        '--fix-cities',
        action="store_true",
        default=False,
        help='Whether to override "city" attribute of blocks and '
        'streets by finding an intersecting Location of a city-ish '
        'type. Only makes sense if you have configured '
        'multiple_cities=True in the METRO_LIST of your settings.py, '
        'and after you have created some appropriate Locations.')

    parser.add_option(
        '-b',
        '--filter-bounds',
        action="store",
        default=1,
        type='int',
        help='Whether to skip blocks outside the metro extent from your '
        'settings.py. Default 1 (true); use 0 to disable.')
    parser.add_option(
        '-l',
        '--filter-location',
        action="append",
        help='A location (spelled as location-type-slug:location-slug) '
        'that will be used to filter out blocks outside its boundaries. '
        'May be passed more than once.')

    parser.add_option('-e',
                      '--encoding',
                      dest='encoding',
                      help='Encoding to use when reading the shapefile',
                      default='utf8')
    (options, args) = parser.parse_args(argv)
    if len(args) != 4:
        return parser.error('must provide 4 arguments, see usage')

    if options.filter_bounds:
        from ebpub.utils.geodjango import get_default_bounds
        filter_bounds = get_default_bounds()
    else:
        filter_bounds = None

    # Optionally filter on bounds of some Locations too.
    loc_bounds = None
    for locslug in options.filter_location or []:
        typeslug, locslug = locslug.split(':', 1)
        from ebpub.db.models import Location
        location = Location.objects.get(location_type__slug=typeslug,
                                        slug=locslug)
        if loc_bounds is None:
            loc_bounds = location.location
        else:
            loc_bounds = loc_bounds.union(location.location)

    if None not in (filter_bounds, loc_bounds):
        filter_bounds = filter_bounds.intersection(loc_bounds)
    elif loc_bounds is not None:
        filter_bounds = loc_bounds
    else:
        filter_bounds = filter_bounds

    tiger = TigerImporter(*args,
                          verbose=options.verbose,
                          filter_city=options.city,
                          filter_bounds=filter_bounds,
                          encoding=options.encoding,
                          fix_cities=options.fix_cities)
    if options.verbose:
        import logging
        logger.setLevel(logging.DEBUG)
    num_created = tiger.save()
    logger.info("Created %d new blocks" % num_created)
    logger.debug("... from %d feature names" % len(tiger.featnames_db))
    logger.debug("feature tlids with blocks: %d" %
                 len(tiger.tlids_with_blocks))

    import pprint
    tlids_wo_blocks = set(tiger.featnames_db.keys()).difference(
        tiger.tlids_with_blocks)
    logger.debug("feature tlids WITHOUT blocks: %d" % len(tlids_wo_blocks))
    all_rows = []
    for t in tlids_wo_blocks:
        all_rows.extend(tiger.featnames_db[t])
    logger.debug("Rows: %d" % len(all_rows))
    names = [(r['FULLNAME'], r['TLID']) for r in all_rows]
    names.sort()
    logger.debug("=================")
    for n, t in names:
        logger.debug("%s %s" % (n, t))
    for tlid in sorted(tlids_wo_blocks)[:10]:
        feat = tiger.featnames_db[tlid]
        logger.debug(pprint.pformat(feat))