def main(argv=None): from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts import ebdata.retrieval.log # Sets up standard handlers. import sys if argv is None: argv = sys.argv[1:] from optparse import OptionParser usage = "usage: %prog [options] city state" parser = OptionParser(usage=usage) add_verbosity_options(parser) options, args = parser.parse_args(argv) if len(args) != 2: parser.print_usage() sys.exit(0) city, state = args scraper = SeeClickFixNewsFeedScraper(city=city.title(), state=state.upper()) setup_logging_from_opts(options, scraper.logger) TESTING = False if TESTING: from ebdata.retrieval import log_debug scraper.display_data() else: scraper.update()
def main(argv=None): import sys if argv is None: argv = sys.argv[1:] from optparse import OptionParser parser = OptionParser() # parser.add_option( # "--start-date", help="Start date for photo search. Default is 30 days ago.", # action='store', default=None, # ) # parser.add_option( # "--end-date", help="Stop date for photo search. Default is now", # action='store', default=None, # ) parser.add_option( "--schema", help="Slug of schema to use. Default is 'photos'.", action='store', default='photos', ) from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts add_verbosity_options(parser) options, args = parser.parse_args(argv) setup_logging_from_opts(options, logger) scraper = FlickrScraper(options) scraper.update()
def main(argv=None, default_url=None): import sys if argv is None: argv = sys.argv[1:] from optparse import OptionParser usage = "usage: %prog [options] <feed url>" parser = OptionParser(usage=usage) parser.add_option( "--schema", help="Slug of the news item type to create when scraping", default="local-news" ) # parser.add_option( # "--http-cache", help='location to use as an http cache. If a cached value is seen, no update is performed.', # action='store' # ) from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts add_verbosity_options(parser) options, args = parser.parse_args(argv) if len(args) >= 1: url = args[0] else: if default_url: url = default_url else: parser.print_usage() sys.exit(0) scraper = RssScraper(url=url, schema_slug=options.schema) setup_logging_from_opts(options, scraper.logger) scraper.update()
def main(argv=None): import sys if argv is None: argv = sys.argv[1:] from optparse import OptionParser usage = "usage: %prog [options] <spreadsheet>" usage += "\n\nSpreadsheet argument can be local files or URLs." parser = OptionParser(usage=usage) parser.add_option( "--schema", help="slug of news item type to create when scraping", default="arrests" ) from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts add_verbosity_options(parser) options, args = parser.parse_args(argv) try: items_sheet = args[0] except IndexError: parser.print_usage() sys.exit(0) scraper = ArrestScraper(items_sheet, schema_slug=options.schema) setup_logging_from_opts(options, scraper.logger) scraper.update()
def main(argv=None): import sys if argv is None: argv = sys.argv[1:] from optparse import OptionParser optparser = OptionParser(usage='''usage: %prog [options] [schema] Updates aggregate statistics for the given schema (default: all schemas). ''') optparser.add_option('-r', '--reset', action='store_true', help='Delete all aggregates before updating.') add_verbosity_options(optparser) optparser.add_option('-d', '--dry-run', action='store_true', help='Dry run, change nothing.') opts, args = optparser.parse_args(argv) setup_logging_from_opts(opts, logger) if args: return update_aggregates(*args, reset=opts.reset, dry_run=opts.dry_run) else: return update_all_aggregates(reset=opts.reset, dry_run=opts.dry_run)
def main(argv=None): import sys if argv is None: argv = sys.argv[1:] from optparse import OptionParser usage = "usage: %prog [options] <feed url>" parser = OptionParser(usage=usage) parser.add_option("--schema", help="which news item type to create when scraping", default="local-news") parser.add_option( "--http-cache", help= 'location to use as an http cache. If a cached value is seen, no update is performed.', action='store') from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts add_verbosity_options(parser) options, args = parser.parse_args(argv) setup_logging_from_opts(options, logger) if len(args) < 1: parser.print_usage() sys.exit(0) scraper = LocalNewsScraper(url=args[0], schema_slug=options.schema, http_cache=options.http_cache) scraper.update()
def main(argv=None, default_url=None): import sys if argv is None: argv = sys.argv[1:] from optparse import OptionParser usage = "usage: %prog [options] <feed url>" parser = OptionParser(usage=usage) parser.add_option( "--schema", help="Slug of the news item type to create when scraping", default="local-news") # parser.add_option( # "--http-cache", help='location to use as an http cache. If a cached value is seen, no update is performed.', # action='store' # ) from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts add_verbosity_options(parser) options, args = parser.parse_args(argv) if len(args) >= 1: url = args[0] else: if default_url: url = default_url else: parser.print_usage() sys.exit(0) scraper = RssScraper(url=url, schema_slug=options.schema) setup_logging_from_opts(options, scraper.logger) scraper.update()
def main(argv=None): import sys if argv is None: argv = sys.argv[1:] from optparse import OptionParser optparser = OptionParser( usage="""usage: %prog [options] [schema] Updates aggregate statistics for the given schema (default: all schemas). """ ) optparser.add_option("-r", "--reset", action="store_true", help="Delete all aggregates before updating.") add_verbosity_options(optparser) optparser.add_option("-d", "--dry-run", action="store_true", help="Dry run, change nothing.") opts, args = optparser.parse_args(argv) setup_logging_from_opts(opts, logger) if args: return update_aggregates(*args, reset=opts.reset, dry_run=opts.dry_run) else: return update_all_aggregates(reset=opts.reset, dry_run=opts.dry_run)
def main(argv=None): from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts import sys if argv is None: argv = sys.argv[1:] from optparse import OptionParser usage = "usage: %prog [options] city state" parser = OptionParser(usage=usage) add_verbosity_options(parser) options, args = parser.parse_args(argv) if len(args) != 2: parser.print_usage() sys.exit(0) city, state = args scraper = SeeClickFixNewsFeedScraper(city=city.title(), state=state.upper()) setup_logging_from_opts(options, scraper.logger) TESTING = False if TESTING: from ebdata.retrieval import log_debug scraper.display_data() else: scraper.update()
def main(argv=None): import sys if argv is None: argv = sys.argv[1:] from optparse import OptionParser usage = "usage: %prog [options] <feed url>" parser = OptionParser(usage=usage) parser.add_option( "--schema", help="which news item type to create when scraping", default="local-news" ) parser.add_option( "--http-cache", help='location to use as an http cache. If a cached value is seen, no update is performed.', action='store' ) from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts add_verbosity_options(parser) options, args = parser.parse_args(argv) setup_logging_from_opts(options, logger) if len(args) < 1: parser.print_usage() sys.exit(0) scraper = LocalNewsScraper(url=args[0], schema_slug=options.schema, http_cache=options.http_cache) scraper.update()
def main(argv=None): import sys if argv is None: argv = sys.argv[1:] from optparse import OptionParser parser = OptionParser() parser.add_option( '-d', "--days", help="How many days (prior to stop date) to search. Default is 30 days.", action='store', default=30, type='int', ) parser.add_option( '-e', "--end-date", help="Stop date for photo search, format YYYY/MM/DD. Default is now.", action='store', default=None, ) parser.add_option( "--schema", help="Slug of schema to use. Default is 'photos'.", action='store', default='photos', ) from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts add_verbosity_options(parser) options, args = parser.parse_args(argv) setup_logging_from_opts(options, logger) scraper = FlickrScraper(options) scraper.update()
def main(argv=None): import sys if argv is None: argv = sys.argv[1:] options, args = parser.parse_args(argv) scraper = MeetupScraper(options) setup_logging_from_opts(options, scraper.logger) scraper.update()
def main(argv=None): if argv is None: argv = sys.argv[1:] optparser = OptionParser() add_verbosity_options(optparser) opts, args = optparser.parse_args(argv) setup_logging_from_opts(opts, logger) EventsCalendarScraper().update()
def main(argv=None): if argv is None: argv = sys.argv[1:] optparser = OptionParser() add_verbosity_options(optparser) opts, args = optparser.parse_args(argv) setup_logging_from_opts(opts, logger) ObituaryScraper().update()
def main(argv=None): if argv is None: argv = sys.argv[1:] optparser = OptionParser() add_verbosity_options(optparser) opts, args = optparser.parse_args(argv) setup_logging_from_opts(opts, logger) for item in settings.TWITTER_DEALS_KEYWORDS: TwitterScraper().update(item)
def main(): parser = OptionParser() parser.add_option('-c', '--clear', help='Clear schema', action="store_true", dest="clear") add_verbosity_options(parser) opts, args = parser.parse_args(sys.argv) scraper = Scraper(clear=opts.clear) setup_logging_from_opts(opts, scraper.logger) scraper.update()
def main(argv=None): from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts from optparse import OptionParser if argv is None: argv = sys.argv[1:] optparser = OptionParser() add_verbosity_options(optparser) opts, args = optparser.parse_args(argv) setup_logging_from_opts(opts, logger) update()
def main(argv=None): from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts from optparse import OptionParser if argv is None: argv = sys.argv[1:] optparser = OptionParser() add_verbosity_options(optparser) opts, args = optparser.parse_args(argv) setup_logging_from_opts(opts, logger) update(args)
def main(): parser = OptionParser() parser.add_option('-c', '--clear', help='Clear schema', action="store_true", dest="clear") add_verbosity_options(parser) opts, args = parser.parse_args(sys.argv) setup_logging_from_opts(opts, logger) if len(args) != 2: parser.error("Please specify a CSV file to import") filename = args[1] RestaurantInspections(clear=opts.clear).update(filename)
def main(argv=None): import sys from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts from optparse import OptionParser if argv is None: argv = sys.argv[1:] optparser = OptionParser() add_verbosity_options(optparser) scraper = TruliaRealEstateScraper() opts, args = optparser.parse_args(argv) setup_logging_from_opts(opts, scraper.logger) scraper.update()
def main(argv=None): if argv is None: argv = sys.argv[1:] optparser = OptionParser() add_verbosity_options(optparser) opts, args = optparser.parse_args(argv) setup_logging_from_opts(opts, logger) for item in settings.YOUTUBE_KEYWORDS: YouTubeScraper().update(item,'1') YouTubeScraper().update(item,'51') YouTubeScraper().update(item,'101') YouTubeScraper().update(item,'151')
def main(argv=None): if argv is None: argv = sys.argv[1:] from optparse import OptionParser usage = "usage: %prog [options] <api url>" parser = OptionParser(usage=usage) parser.add_option( "-k", "--api-key", help='GeoReport V2 API key', action='store', ) parser.add_option( "--html-url-template", help= 'template for creating html urls for items based on their identifiers, eg http://example.com/{id}.html', action='store') parser.add_option("--days-prior", help='how many days ago to start scraping', type="int", default=90) parser.add_option("--schema", help="slug of news item type to use", default="open311-service-requests") parser.add_option( "--http-cache", help= 'location to use as an http cache. If a cached value is seen, no update is performed.', action='store') parser.add_option("--jurisdiction-id", help='jurisdiction identifier to provide to api', action='store') from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts add_verbosity_options(parser) options, args = parser.parse_args(argv) setup_logging_from_opts(options, log) if len(args) < 1: parser.print_usage() return 1 scraper = GeoReportV2Scraper(api_url=args[0], api_key=options.api_key, jurisdiction_id=options.jurisdiction_id, schema_slug=options.schema, days_prior=options.days_prior, http_cache=options.http_cache, html_url_template=options.html_url_template) scraper.update() return 0
def main(argv=None): if argv is None: argv = sys.argv[1:] from optparse import OptionParser usage = "usage: %prog [options] <api url>" parser = OptionParser(usage=usage) parser.add_option( "-k", "--api-key", help='GeoReport V2 API key', action='store', ) parser.add_option( "--html-url-template", help='template for creating html urls for items based on their identifiers, eg http://example.com/{id}.html', action='store' ) parser.add_option( "--days-prior", help='how many days ago to start scraping', type="int", default=90 ) parser.add_option( "--schema", help="which news item type to create when scraping", default="open311-service-requests" ) parser.add_option( "--http-cache", help='location to use as an http cache. If a cached value is seen, no update is performed.', action='store' ) parser.add_option( "--jurisdiction-id", help='jurisdiction identifier to provide to api', action='store' ) from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts add_verbosity_options(parser) options, args = parser.parse_args(argv) setup_logging_from_opts(options, log) if len(args) < 1: parser.print_usage() return 1 scraper = GeoReportV2Scraper(api_url=args[0], api_key=options.api_key, jurisdiction_id=options.jurisdiction_id, schema_slug=options.schema, days_prior=options.days_prior, http_cache=options.http_cache, html_url_template=options.html_url_template) scraper.update() return 0
def main(argv=None): argv = argv or sys.argv[1:] from optparse import OptionParser from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts optparser = OptionParser() add_verbosity_options(optparser) opts, args = optparser.parse_args(argv) # This sets up the root logger & handlers as per other scrapers. import ebdata.retrieval.log setup_logging_from_opts(opts, logger=logger) if args: url = args[0] else: url = 'http://www.gocolumbiamo.com/PSJC/Services/911/911dispatch/police_georss.php' return update(url)
def main(argv=None): argv = argv or sys.argv[1:] from optparse import OptionParser from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts optparser = OptionParser() add_verbosity_options(optparser) opts, args = optparser.parse_args(argv) # This sets up the root logger & handlers as per other scrapers. import ebdata.retrieval.log setup_logging_from_opts(opts, logger=logger) if args: url = args[0] else: url = 'http://report.boonecountymo.org/mrcjava/mrcclasses/SH01_MP/cadlogs.xml' return update(url)
def main(argv=None): import sys from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts from optparse import OptionParser if argv is None: argv = sys.argv[1:] optparser = OptionParser() optparser.add_option( '-d', '--days', help="How many days ago to start searching. Default is 10. -1 means load everything.", action="store", default=10, type="int", ) add_verbosity_options(optparser) opts, args = optparser.parse_args(argv) scraper = MidMoReviewsScraper(options=opts) setup_logging_from_opts(opts, scraper.logger) scraper.update()
def main(argv=None): import sys if argv is None: argv = sys.argv[1:] from optparse import OptionParser usage = "usage: %prog [options] <spreadsheet> [<mapping spreadsheet>]" usage += "\n\nSpreadsheet arguments can be local files or URLs." usage += "\n\nSee http://openblockproject.org/docs/packages/ebdata.html#spreadsheets-scrapers-general-spreadsheet for more." parser = OptionParser(usage=usage) parser.add_option("--schema", help="slug of news item type to create when scraping", default="local-news") parser.add_option( "--unique-fields", help= "Which NewsItem fields identify a unique record in this data source. Comma-separated, eg. --unique-fields='url,location_name,title", action="store", default=None) from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts add_verbosity_options(parser) options, args = parser.parse_args(argv) if len(args) >= 1: items_sheet = args[0] if len(args) >= 2: map_sheet = args[1] else: map_sheet = None else: parser.print_usage() sys.exit(0) if options.unique_fields: unique_fields = [s.strip() for s in options.unique_fields.split(',')] else: unique_fields = [] scraper = SpreadsheetScraper(items_sheet, map_sheet, schema_slug=options.schema, unique_fields=unique_fields) setup_logging_from_opts(options, scraper.logger) scraper.update()
def main(argv=None): import sys argv = argv or sys.argv[1:] from optparse import OptionParser usage = "usage: %prog [options]" parser = OptionParser(usage=usage) parser.add_option( "--days-prior", help='how many days ago to start scraping', type="int", default=30 ) add_verbosity_options(parser) options, args = parser.parse_args(argv) # This sets up the root logger & handlers as per other scrapers. import ebdata.retrieval.log setup_logging_from_opts(options, logger=log) scraper = EverythingMidMoBusinessScraper(days_prior=options.days_prior) scraper.update()
def main(argv=None): from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts import sys if argv is None: argv = sys.argv[1:] from optparse import OptionParser parser = OptionParser() add_verbosity_options(parser) parser.add_option('-n', '--name-start', help='Name of first restaurant to start with.' ' This is useful if you\'ve run the scraper and it\'s broken ' 'several hours into it; you can pick up around where it left off.') options, args = parser.parse_args(argv) scraper = RestaurantScraper(name_start=options.name_start) setup_logging_from_opts(options, scraper.logger) scraper.update()
def main(argv=None): argv = argv or sys.argv[1:] from optparse import OptionParser from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts optparser = OptionParser() optparser.add_option( '-d', '--days', help="How many days ago to start searching. Default is 30. -1 means load everything.", action="store", default=30, type="int", ) add_verbosity_options(optparser) opts, args = optparser.parse_args(argv) # This sets up the root logger & handlers as per other scrapers. import ebdata.retrieval.log setup_logging_from_opts(opts, logger=logger) xmlpath = fetch_xml() xmlfile = open(xmlpath, 'r') return update(xmlfile, opts)
def main(argv=None): import sys if argv is None: argv = sys.argv[1:] from optparse import OptionParser usage = "usage: %prog [options] <spreadsheet> [<mapping spreadsheet>]" usage += "\n\nSpreadsheet arguments can be local files or URLs." usage += "\n\nSee http://openblockproject.org/docs/packages/ebdata.html#spreadsheets-scrapers-general-spreadsheet for more." parser = OptionParser(usage=usage) parser.add_option( "--schema", help="slug of news item type to create when scraping", default="local-news" ) parser.add_option( "--unique-fields", help="Which NewsItem fields identify a unique record in this data source. Comma-separated, eg. --unique-fields='url,location_name,title", action="store", default=None ) from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts add_verbosity_options(parser) options, args = parser.parse_args(argv) if len(args) >= 1: items_sheet = args[0] if len(args) >= 2: map_sheet = args[1] else: map_sheet = None else: parser.print_usage() sys.exit(0) if options.unique_fields: unique_fields = [s.strip() for s in options.unique_fields.split(',')] else: unique_fields = [] scraper = SpreadsheetScraper(items_sheet, map_sheet, schema_slug=options.schema, unique_fields=unique_fields) setup_logging_from_opts(options, scraper.logger) scraper.update()
def main(argv=None): if argv is None: argv = sys.argv[1:] from optparse import OptionParser usage = "usage: %prog [options]" parser = OptionParser(usage=usage) parser.add_option( "--days-prior", help='how many days ago to start scraping', type="int", default=90 ) from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts add_verbosity_options(parser) options, args = parser.parse_args(argv) setup_logging_from_opts(options, logger) scraper = ColumbiaRestaurantInspScraper() scraper.update() return 0
def main(argv=None): import sys from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts from optparse import OptionParser if argv is None: argv = sys.argv[1:] optparser = OptionParser() optparser.add_option('-s', '--start-date', help='Date to start scraping, in YYYY/MM/DD format. If not passed, default is 7 days ago.' ) add_verbosity_options(optparser) opts, args = optparser.parse_args(argv) if opts.start_date: from ebpub.utils.dates import parse_date start_date = parse_date(opts.start_date, '%Y/%m/%d') else: start_date = None scraper = Scraper(start_date=start_date) setup_logging_from_opts(opts, scraper.logger) scraper.update()
def main(argv=None): import sys if argv is None: argv = sys.argv[1:] from optparse import OptionParser usage = "usage: %prog [options] <spreadsheet> [<config spreadsheet>]" parser = OptionParser(usage=usage) parser.add_option( "--schema", help="which news item type to create when scraping", default="local-news" ) parser.add_option( "--unique-fields", help="Which NewsItem fields identify a unique record in this data source. Comma-separated, eg. --unique-fields='url,location_name,title", action="store", default=None ) from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts add_verbosity_options(parser) options, args = parser.parse_args(argv) if len(args) >= 1: items_sheet = args[0] if len(args) >= 2: map_sheet = args[1] else: map_sheet = None else: parser.print_usage() sys.exit(0) if options.unique_fields: unique_fields = [s.strip() for s in options.unique_fields.split(',')] else: unique_fields = [] scraper = SpreadsheetScraper(items_sheet, map_sheet, schema_slug=options.schema, unique_fields=unique_fields) setup_logging_from_opts(options, scraper.logger) scraper.update()
def main(): parser = OptionParser() parser.add_option('-c', '--clear', help='Clear schema', action="store_true", dest="clear") parser.add_option('-s', '--stats', help='Report file stats only', action="store_true", dest="stats") add_verbosity_options(parser) opts, args = parser.parse_args(sys.argv) setup_logging_from_opts(opts, logger) if len(args) != 3: parser.error("Please specify a CSV file and shapefile to import") csv_name, shp_name = args[1], args[2] csvreader = csv.DictReader(open(csv_name)) layer = DataSource(shp_name)[0] prop_trans = PropertyTransactions(clear=opts.clear) if opts.stats: prop_trans.stats(csvreader, layer) else: prop_trans.update(csvreader, layer)
def main(argv=None): from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts import sys if argv is None: argv = sys.argv[1:] from optparse import OptionParser parser = OptionParser() add_verbosity_options(parser) parser.add_option( '-n', '--name-start', help='Name of first restaurant to start with.' ' This is useful if you\'ve run the scraper and it\'s broken ' 'several hours into it; you can pick up around where it left off.') options, args = parser.parse_args(argv) scraper = RestaurantScraper(name_start=options.name_start) setup_logging_from_opts(options, scraper.logger) scraper.update()
def main(argv=None): import sys if argv is None: argv = sys.argv[1:] from optparse import OptionParser parser = OptionParser() parser.add_option( '-d', "--days", help= "How many days (prior to stop date) to search. Default is 30 days.", action='store', default=30, type='int', ) parser.add_option( '-e', "--end-date", help="Stop date for photo search, format YYYY/MM/DD. Default is now.", action='store', default=None, ) parser.add_option( "--schema", help="Slug of schema to use. Default is 'photos'.", action='store', default='photos', ) from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts add_verbosity_options(parser) options, args = parser.parse_args(argv) setup_logging_from_opts(options, logger) scraper = FlickrScraper(options) scraper.update()