Beispiel #1
0
def main(argv=None):
    import sys
    if argv is None:
        argv = sys.argv[1:]
    from optparse import OptionParser
    optparser = OptionParser(usage='''usage: %prog [options] [schema]

Updates aggregate statistics for the given schema (default: all schemas).
''')
    optparser.add_option('-r',
                         '--reset',
                         action='store_true',
                         help='Delete all aggregates before updating.')

    add_verbosity_options(optparser)

    optparser.add_option('-d',
                         '--dry-run',
                         action='store_true',
                         help='Dry run, change nothing.')

    opts, args = optparser.parse_args(argv)

    setup_logging_from_opts(opts, logger)

    if args:
        return update_aggregates(*args, reset=opts.reset, dry_run=opts.dry_run)
    else:
        return update_all_aggregates(reset=opts.reset, dry_run=opts.dry_run)
def main(argv=None):
    import sys
    if argv is None:
        argv = sys.argv[1:]
    from optparse import OptionParser
    usage = "usage: %prog [options] <spreadsheet>"
    usage += "\n\nSpreadsheet argument can be local files or URLs."
    parser = OptionParser(usage=usage)

    parser.add_option(
        "--schema", help="slug of news item type to create when scraping",
        default="arrests"
    )

    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    add_verbosity_options(parser)

    options, args = parser.parse_args(argv)
    try:
        items_sheet = args[0]
    except IndexError:
        parser.print_usage()
        sys.exit(0)

    scraper = ArrestScraper(items_sheet, schema_slug=options.schema)
    setup_logging_from_opts(options, scraper.logger)
    scraper.update()
def main(argv=None):
    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    import ebdata.retrieval.log # Sets up standard handlers.
    import sys
    if argv is None:
        argv = sys.argv[1:]

    from optparse import OptionParser
    usage = "usage: %prog [options] city state"
    parser = OptionParser(usage=usage)
    add_verbosity_options(parser)

    options, args = parser.parse_args(argv)
    if len(args) != 2:
        parser.print_usage()
        sys.exit(0)
    city, state = args
    scraper = SeeClickFixNewsFeedScraper(city=city.title(), state=state.upper())
    setup_logging_from_opts(options, scraper.logger)

    TESTING = False
    if TESTING:
        from ebdata.retrieval import log_debug
        scraper.display_data()
    else:
        scraper.update()
Beispiel #4
0
def main(argv=None):
    import sys

    if argv is None:
        argv = sys.argv[1:]
    from optparse import OptionParser

    optparser = OptionParser(
        usage="""usage: %prog [options] [schema]

Updates aggregate statistics for the given schema (default: all schemas).
"""
    )
    optparser.add_option("-r", "--reset", action="store_true", help="Delete all aggregates before updating.")

    add_verbosity_options(optparser)

    optparser.add_option("-d", "--dry-run", action="store_true", help="Dry run, change nothing.")

    opts, args = optparser.parse_args(argv)

    setup_logging_from_opts(opts, logger)

    if args:
        return update_aggregates(*args, reset=opts.reset, dry_run=opts.dry_run)
    else:
        return update_all_aggregates(reset=opts.reset, dry_run=opts.dry_run)
Beispiel #5
0
def main(argv=None):
    import sys
    if argv is None:
        argv = sys.argv[1:]

    from optparse import OptionParser
    usage = "usage: %prog [options] <feed url>"
    parser = OptionParser(usage=usage)

    parser.add_option("--schema",
                      help="which news item type to create when scraping",
                      default="local-news")
    parser.add_option(
        "--http-cache",
        help=
        'location to use as an http cache.  If a cached value is seen, no update is performed.',
        action='store')

    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    add_verbosity_options(parser)

    options, args = parser.parse_args(argv)
    setup_logging_from_opts(options, logger)

    if len(args) < 1:
        parser.print_usage()
        sys.exit(0)

    scraper = LocalNewsScraper(url=args[0],
                               schema_slug=options.schema,
                               http_cache=options.http_cache)

    scraper.update()
Beispiel #6
0
def main(argv=None, default_url=None):
    import sys
    if argv is None:
        argv = sys.argv[1:]

    from optparse import OptionParser
    usage = "usage: %prog [options] <feed url>"
    parser = OptionParser(usage=usage)

    parser.add_option(
        "--schema", help="Slug of the news item type to create when scraping",
        default="local-news"
        )
    # parser.add_option(
    #     "--http-cache", help='location to use as an http cache.  If a cached value is seen, no update is performed.', 
    #     action='store'
    #     )

    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    add_verbosity_options(parser)

    options, args = parser.parse_args(argv)
    if len(args) >= 1:
        url = args[0]
    else:
        if default_url:
            url = default_url
        else:
            parser.print_usage()
            sys.exit(0)

    scraper = RssScraper(url=url, schema_slug=options.schema)
    setup_logging_from_opts(options, scraper.logger)
    scraper.update()
def main():
    parser = OptionParser()
    parser.add_option('-c', '--clear', help='Clear schema',
                      action="store_true", dest="clear")
    add_verbosity_options(parser)
    opts, args = parser.parse_args(sys.argv)
    AddressesScraper(clear=opts.clear).run()
Beispiel #8
0
def main(argv=None, default_url=None):
    import sys
    if argv is None:
        argv = sys.argv[1:]

    from optparse import OptionParser
    usage = "usage: %prog [options] <feed url>"
    parser = OptionParser(usage=usage)

    parser.add_option(
        "--schema",
        help="Slug of the news item type to create when scraping",
        default="local-news")
    # parser.add_option(
    #     "--http-cache", help='location to use as an http cache.  If a cached value is seen, no update is performed.',
    #     action='store'
    #     )

    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    add_verbosity_options(parser)

    options, args = parser.parse_args(argv)
    if len(args) >= 1:
        url = args[0]
    else:
        if default_url:
            url = default_url
        else:
            parser.print_usage()
            sys.exit(0)

    scraper = RssScraper(url=url, schema_slug=options.schema)
    setup_logging_from_opts(options, scraper.logger)
    scraper.update()
def main(argv=None):
    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    import sys
    if argv is None:
        argv = sys.argv[1:]

    from optparse import OptionParser
    usage = "usage: %prog [options] city state"
    parser = OptionParser(usage=usage)
    add_verbosity_options(parser)

    options, args = parser.parse_args(argv)
    if len(args) != 2:
        parser.print_usage()
        sys.exit(0)
    city, state = args
    scraper = SeeClickFixNewsFeedScraper(city=city.title(), state=state.upper())
    setup_logging_from_opts(options, scraper.logger)

    TESTING = False
    if TESTING:
        from ebdata.retrieval import log_debug
        scraper.display_data()
    else:
        scraper.update()
Beispiel #10
0
def main(argv=None):
    import sys
    if argv is None:
        argv = sys.argv[1:]

    from optparse import OptionParser
    usage = "usage: %prog [options] <feed url>"
    parser = OptionParser(usage=usage)

    parser.add_option(
        "--schema", help="which news item type to create when scraping",
        default="local-news"
        )
    parser.add_option(
        "--http-cache", help='location to use as an http cache.  If a cached value is seen, no update is performed.', 
        action='store'
        )

    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    add_verbosity_options(parser)

    options, args = parser.parse_args(argv)
    setup_logging_from_opts(options, logger)

    if len(args) < 1:
        parser.print_usage()
        sys.exit(0)

    scraper = LocalNewsScraper(url=args[0],  schema_slug=options.schema, http_cache=options.http_cache)

    scraper.update()
def main(argv=None):
    import sys
    if argv is None:
        argv = sys.argv[1:]
    from optparse import OptionParser
    parser = OptionParser()
    parser.add_option(
        '-d', "--days",
        help="How many days (prior to stop date) to search. Default is 30 days.",
        action='store', default=30, type='int',
        )
    parser.add_option(
        '-e', "--end-date",
        help="Stop date for photo search, format YYYY/MM/DD. Default is now.",
        action='store', default=None,
        )
    parser.add_option(
        "--schema", help="Slug of schema to use. Default is 'photos'.",
        action='store', default='photos',
        )

    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    add_verbosity_options(parser)

    options, args = parser.parse_args(argv)
    setup_logging_from_opts(options, logger)
    scraper = FlickrScraper(options)
    scraper.update()
Beispiel #12
0
def main(argv=None):
    import sys
    if argv is None:
        argv = sys.argv[1:]
    from optparse import OptionParser
    parser = OptionParser()
    # parser.add_option(
    #     "--start-date", help="Start date for photo search. Default is 30 days ago.",
    #     action='store', default=None,
    #     )
    # parser.add_option(
    #     "--end-date", help="Stop date for photo search. Default is now",
    #     action='store', default=None,
    #     )
    parser.add_option(
        "--schema", help="Slug of schema to use. Default is 'photos'.",
        action='store', default='photos',
        )

    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    add_verbosity_options(parser)

    options, args = parser.parse_args(argv)
    setup_logging_from_opts(options, logger)
    scraper = FlickrScraper(options)
    scraper.update()
Beispiel #13
0
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]
    optparser = OptionParser()
    add_verbosity_options(optparser)
    opts, args = optparser.parse_args(argv)
    setup_logging_from_opts(opts, logger)
    EventsCalendarScraper().update()
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]
    optparser = OptionParser()
    add_verbosity_options(optparser)
    opts, args = optparser.parse_args(argv)
    setup_logging_from_opts(opts, logger)
    ObituaryScraper().update()
def main():
    parser = OptionParser()
    parser.add_option('-c', '--clear', help='Clear schema',
                      action="store_true", dest="clear")
    add_verbosity_options(parser)
    opts, args = parser.parse_args(sys.argv)
    scraper = Scraper(clear=opts.clear)
    setup_logging_from_opts(opts, scraper.logger)
    scraper.update()
Beispiel #16
0
def main(argv=None):
	if argv is None:
		argv = sys.argv[1:]
	optparser = OptionParser()
	add_verbosity_options(optparser)
	opts, args = optparser.parse_args(argv)
	setup_logging_from_opts(opts, logger)
	for item in settings.TWITTER_DEALS_KEYWORDS:
			TwitterScraper().update(item)
Beispiel #17
0
def main(argv=None):
    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    from optparse import OptionParser
    if argv is None:
        argv = sys.argv[1:]
    optparser = OptionParser()
    add_verbosity_options(optparser)
    opts, args = optparser.parse_args(argv)
    setup_logging_from_opts(opts, logger)
    update(args)
Beispiel #18
0
def main(argv=None):
    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    from optparse import OptionParser
    if argv is None:
        argv = sys.argv[1:]
    optparser = OptionParser()
    add_verbosity_options(optparser)
    opts, args = optparser.parse_args(argv)
    setup_logging_from_opts(opts, logger)
    update()
def main():
    parser = OptionParser()
    parser.add_option('-c', '--clear', help='Clear schema',
                      action="store_true", dest="clear")
    add_verbosity_options(parser)
    opts, args = parser.parse_args(sys.argv)
    setup_logging_from_opts(opts, logger)
    if len(args) != 2:
        parser.error("Please specify a CSV file to import")
    filename = args[1]
    RestaurantInspections(clear=opts.clear).update(filename)
def main(argv=None):
    import sys
    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    from optparse import OptionParser
    if argv is None:
        argv = sys.argv[1:]
    optparser = OptionParser()
    add_verbosity_options(optparser)
    scraper = TruliaRealEstateScraper()
    opts, args = optparser.parse_args(argv)
    setup_logging_from_opts(opts, scraper.logger)
    scraper.update()
def main(argv=None):
        if argv is None:
                        argv = sys.argv[1:]
        optparser = OptionParser()
        add_verbosity_options(optparser)
        opts, args = optparser.parse_args(argv)
        setup_logging_from_opts(opts, logger)
        for item in settings.YOUTUBE_KEYWORDS:
			YouTubeScraper().update(item,'1')
			YouTubeScraper().update(item,'51')
			YouTubeScraper().update(item,'101')
			YouTubeScraper().update(item,'151')
Beispiel #22
0
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

    from optparse import OptionParser
    usage = "usage: %prog [options] <api url>"
    parser = OptionParser(usage=usage)
    parser.add_option(
        "-k",
        "--api-key",
        help='GeoReport V2 API key',
        action='store',
    )
    parser.add_option(
        "--html-url-template",
        help=
        'template for creating html urls for items based on their identifiers, eg http://example.com/{id}.html',
        action='store')
    parser.add_option("--days-prior",
                      help='how many days ago to start scraping',
                      type="int",
                      default=90)
    parser.add_option("--schema",
                      help="slug of news item type to use",
                      default="open311-service-requests")
    parser.add_option(
        "--http-cache",
        help=
        'location to use as an http cache.  If a cached value is seen, no update is performed.',
        action='store')
    parser.add_option("--jurisdiction-id",
                      help='jurisdiction identifier to provide to api',
                      action='store')

    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    add_verbosity_options(parser)

    options, args = parser.parse_args(argv)
    setup_logging_from_opts(options, log)

    if len(args) < 1:
        parser.print_usage()
        return 1

    scraper = GeoReportV2Scraper(api_url=args[0],
                                 api_key=options.api_key,
                                 jurisdiction_id=options.jurisdiction_id,
                                 schema_slug=options.schema,
                                 days_prior=options.days_prior,
                                 http_cache=options.http_cache,
                                 html_url_template=options.html_url_template)
    scraper.update()
    return 0
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

    from optparse import OptionParser
    usage = "usage: %prog [options] <api url>"
    parser = OptionParser(usage=usage)
    parser.add_option(
        "-k", "--api-key", help='GeoReport V2 API key', action='store',
        )
    parser.add_option(
        "--html-url-template",
        help='template for creating html urls for items based on their identifiers, eg http://example.com/{id}.html',
        action='store'
        )
    parser.add_option(
        "--days-prior", help='how many days ago to start scraping', type="int",
        default=90
        )
    parser.add_option(
        "--schema", help="which news item type to create when scraping",
        default="open311-service-requests"
        )
    parser.add_option(
        "--http-cache", help='location to use as an http cache.  If a cached value is seen, no update is performed.', 
        action='store'
        )
    parser.add_option(
        "--jurisdiction-id", help='jurisdiction identifier to provide to api',
        action='store'
        )

    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    add_verbosity_options(parser)

    options, args = parser.parse_args(argv)
    setup_logging_from_opts(options, log)

    if len(args) < 1:
        parser.print_usage()
        return 1
    
    scraper = GeoReportV2Scraper(api_url=args[0], api_key=options.api_key,
                                 jurisdiction_id=options.jurisdiction_id,
                                 schema_slug=options.schema,
                                 days_prior=options.days_prior,
                                 http_cache=options.http_cache,
                                 html_url_template=options.html_url_template)
    scraper.update()
    return 0
def main(argv=None):
    argv = argv or sys.argv[1:]
    from optparse import OptionParser
    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    optparser = OptionParser()
    add_verbosity_options(optparser)
    opts, args = optparser.parse_args(argv)
    # This sets up the root logger & handlers as per other scrapers.
    import ebdata.retrieval.log
    setup_logging_from_opts(opts, logger=logger)
    if args:
        url = args[0]
    else:
        url = 'http://report.boonecountymo.org/mrcjava/mrcclasses/SH01_MP/cadlogs.xml'
    return update(url)
def main(argv=None):
    argv = argv or sys.argv[1:]
    from optparse import OptionParser
    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    optparser = OptionParser()
    add_verbosity_options(optparser)
    opts, args = optparser.parse_args(argv)
    # This sets up the root logger & handlers as per other scrapers.
    import ebdata.retrieval.log
    setup_logging_from_opts(opts, logger=logger)
    if args:
        url = args[0]
    else:
        url = 'http://www.gocolumbiamo.com/PSJC/Services/911/911dispatch/police_georss.php'
    return update(url)
def main(argv=None):
    import sys
    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    from optparse import OptionParser
    if argv is None:
        argv = sys.argv[1:]
    optparser = OptionParser()
    optparser.add_option(
        '-d', '--days',
        help="How many days ago to start searching. Default is 10. -1 means load everything.",
        action="store", default=10, type="int",
        )
    add_verbosity_options(optparser)
    opts, args = optparser.parse_args(argv)
    scraper = MidMoReviewsScraper(options=opts)
    setup_logging_from_opts(opts, scraper.logger)
    scraper.update()
Beispiel #27
0
def main(argv=None):
    import sys
    if argv is None:
        argv = sys.argv[1:]
    from optparse import OptionParser
    usage = "usage: %prog [options] <spreadsheet> [<mapping spreadsheet>]"
    usage += "\n\nSpreadsheet arguments can be local files or URLs."
    usage += "\n\nSee http://openblockproject.org/docs/packages/ebdata.html#spreadsheets-scrapers-general-spreadsheet for more."
    parser = OptionParser(usage=usage)

    parser.add_option("--schema",
                      help="slug of news item type to create when scraping",
                      default="local-news")

    parser.add_option(
        "--unique-fields",
        help=
        "Which NewsItem fields identify a unique record in this data source. Comma-separated, eg. --unique-fields='url,location_name,title",
        action="store",
        default=None)

    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    add_verbosity_options(parser)

    options, args = parser.parse_args(argv)
    if len(args) >= 1:
        items_sheet = args[0]
        if len(args) >= 2:
            map_sheet = args[1]
        else:
            map_sheet = None
    else:
        parser.print_usage()
        sys.exit(0)

    if options.unique_fields:
        unique_fields = [s.strip() for s in options.unique_fields.split(',')]
    else:
        unique_fields = []
    scraper = SpreadsheetScraper(items_sheet,
                                 map_sheet,
                                 schema_slug=options.schema,
                                 unique_fields=unique_fields)
    setup_logging_from_opts(options, scraper.logger)
    scraper.update()
Beispiel #28
0
def main(argv=None):
    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    import sys
    if argv is None:
        argv = sys.argv[1:]
    from optparse import OptionParser
    parser = OptionParser()
    add_verbosity_options(parser)
    parser.add_option('-n', '--name-start', help='Name of first restaurant to start with.'
                      ' This is useful if you\'ve run the scraper and it\'s broken '
                      'several hours into it; you can pick up around where it left off.')

    options, args = parser.parse_args(argv)

    scraper = RestaurantScraper(name_start=options.name_start)
    setup_logging_from_opts(options, scraper.logger)

    scraper.update()
def main(argv=None):
    argv = argv or sys.argv[1:]
    from optparse import OptionParser
    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    optparser = OptionParser()
    optparser.add_option(
        '-d', '--days',
        help="How many days ago to start searching. Default is 30. -1 means load everything.",
        action="store", default=30, type="int",
        )
    add_verbosity_options(optparser)
    opts, args = optparser.parse_args(argv)
    # This sets up the root logger & handlers as per other scrapers.
    import ebdata.retrieval.log
    setup_logging_from_opts(opts, logger=logger)
    xmlpath = fetch_xml()
    xmlfile = open(xmlpath, 'r')
    return update(xmlfile, opts)
def main(argv=None):
    import sys
    argv = argv or sys.argv[1:]
    from optparse import OptionParser
    usage = "usage: %prog [options]"
    parser = OptionParser(usage=usage)
    parser.add_option(
        "--days-prior", help='how many days ago to start scraping', type="int",
        default=30
        )
    add_verbosity_options(parser)
    options, args = parser.parse_args(argv)
    # This sets up the root logger & handlers as per other scrapers.
    import ebdata.retrieval.log
    setup_logging_from_opts(options, logger=log)

    scraper = EverythingMidMoBusinessScraper(days_prior=options.days_prior)
    scraper.update()
Beispiel #31
0
def main(argv=None):
    import sys
    if argv is None:
        argv = sys.argv[1:]
    from optparse import OptionParser
    usage = "usage: %prog [options] <spreadsheet> [<mapping spreadsheet>]"
    usage += "\n\nSpreadsheet arguments can be local files or URLs."
    usage += "\n\nSee http://openblockproject.org/docs/packages/ebdata.html#spreadsheets-scrapers-general-spreadsheet for more."
    parser = OptionParser(usage=usage)

    parser.add_option(
        "--schema", help="slug of news item type to create when scraping",
        default="local-news"
        )

    parser.add_option(
        "--unique-fields", help="Which NewsItem fields identify a unique record in this data source. Comma-separated, eg. --unique-fields='url,location_name,title",
        action="store", default=None
        )

    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    add_verbosity_options(parser)

    options, args = parser.parse_args(argv)
    if len(args) >= 1:
        items_sheet = args[0]
        if len(args) >= 2:
            map_sheet = args[1]
        else:
            map_sheet = None
    else:
        parser.print_usage()
        sys.exit(0)

    if options.unique_fields:
        unique_fields = [s.strip() for s in options.unique_fields.split(',')]
    else:
        unique_fields = []
    scraper = SpreadsheetScraper(items_sheet, map_sheet,
                                 schema_slug=options.schema,
                                 unique_fields=unique_fields)
    setup_logging_from_opts(options, scraper.logger)
    scraper.update()
Beispiel #32
0
def main(argv=None):
    import sys
    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    from optparse import OptionParser
    if argv is None:
        argv = sys.argv[1:]
    optparser = OptionParser()
    optparser.add_option('-s', '--start-date',
                         help='Date to start scraping, in YYYY/MM/DD format. If not passed, default is 7 days ago.'
                         )
    add_verbosity_options(optparser)
    opts, args = optparser.parse_args(argv)
    if opts.start_date:
        from ebpub.utils.dates import parse_date
        start_date = parse_date(opts.start_date, '%Y/%m/%d')
    else:
        start_date = None
    scraper = Scraper(start_date=start_date)
    setup_logging_from_opts(opts, scraper.logger)
    scraper.update()
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

    from optparse import OptionParser
    usage = "usage: %prog [options]"
    parser = OptionParser(usage=usage)
    parser.add_option(
        "--days-prior", help='how many days ago to start scraping', type="int",
        default=90
        )
    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    add_verbosity_options(parser)

    options, args = parser.parse_args(argv)
    setup_logging_from_opts(options, logger)

    scraper = ColumbiaRestaurantInspScraper()
    scraper.update()
    return 0
Beispiel #34
0
def main(argv=None):
    import sys
    if argv is None:
        argv = sys.argv[1:]
    from optparse import OptionParser
    usage = "usage: %prog [options] <spreadsheet> [<config spreadsheet>]"
    parser = OptionParser(usage=usage)

    parser.add_option(
        "--schema", help="which news item type to create when scraping",
        default="local-news"
        )

    parser.add_option(
        "--unique-fields", help="Which NewsItem fields identify a unique record in this data source. Comma-separated, eg. --unique-fields='url,location_name,title",
        action="store", default=None
        )

    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    add_verbosity_options(parser)

    options, args = parser.parse_args(argv)
    if len(args) >= 1:
        items_sheet = args[0]
        if len(args) >= 2:
            map_sheet = args[1]
        else:
            map_sheet = None
    else:
        parser.print_usage()
        sys.exit(0)

    if options.unique_fields:
        unique_fields = [s.strip() for s in options.unique_fields.split(',')]
    else:
        unique_fields = []
    scraper = SpreadsheetScraper(items_sheet, map_sheet,
                                 schema_slug=options.schema,
                                 unique_fields=unique_fields)
    setup_logging_from_opts(options, scraper.logger)
    scraper.update()
Beispiel #35
0
def main(argv=None):
    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    import sys
    if argv is None:
        argv = sys.argv[1:]
    from optparse import OptionParser
    parser = OptionParser()
    add_verbosity_options(parser)
    parser.add_option(
        '-n',
        '--name-start',
        help='Name of first restaurant to start with.'
        ' This is useful if you\'ve run the scraper and it\'s broken '
        'several hours into it; you can pick up around where it left off.')

    options, args = parser.parse_args(argv)

    scraper = RestaurantScraper(name_start=options.name_start)
    setup_logging_from_opts(options, scraper.logger)

    scraper.update()
def main():
    parser = OptionParser()
    parser.add_option('-c', '--clear', help='Clear schema',
                      action="store_true", dest="clear")
    parser.add_option('-s', '--stats', help='Report file stats only',
                      action="store_true", dest="stats")
    add_verbosity_options(parser)
    opts, args = parser.parse_args(sys.argv)
    setup_logging_from_opts(opts, logger)
    if len(args) != 3:
        parser.error("Please specify a CSV file and shapefile to import")
    csv_name, shp_name = args[1], args[2]
    csvreader = csv.DictReader(open(csv_name))
    layer = DataSource(shp_name)[0]

    prop_trans = PropertyTransactions(clear=opts.clear)

    if opts.stats:
        prop_trans.stats(csvreader, layer)
    else:
        prop_trans.update(csvreader, layer)
def main(argv=None):
    import sys
    if argv is None:
        argv = sys.argv[1:]
    from optparse import OptionParser
    parser = OptionParser()
    parser.add_option(
        '-d',
        "--days",
        help=
        "How many days (prior to stop date) to search. Default is 30 days.",
        action='store',
        default=30,
        type='int',
    )
    parser.add_option(
        '-e',
        "--end-date",
        help="Stop date for photo search, format YYYY/MM/DD. Default is now.",
        action='store',
        default=None,
    )
    parser.add_option(
        "--schema",
        help="Slug of schema to use. Default is 'photos'.",
        action='store',
        default='photos',
    )

    from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
    add_verbosity_options(parser)

    options, args = parser.parse_args(argv)
    setup_logging_from_opts(options, logger)
    scraper = FlickrScraper(options)
    scraper.update()
            return
	print location
        kwargs = dict(item_date=date,
                      location=location,
                      location_name=location_name,
                      description=description,
                      title=list_record['title'],
                      url=list_record['link'],
                      )
        attributes = None
        self.create_or_update(old_record, attributes, **kwargs)


if __name__ == "__main__":
        import sys
        from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts
        from optparse import OptionParser
#       if argv is None:
#           argv = sys.argv[1:]
        optparser = OptionParser()
        add_verbosity_options(optparser)
        scraper = KSCScraper()
#       opts, args = optparser.parse_args(argv)
#       setup_logging_from_opts(opts, scraper.logger)
        # During testing, do this instead:
        # scraper.display_data()
        scraper.update()



Beispiel #39
0
                  '--start-page',
                  help="Page of results to start from. Default is zero.",
                  default=0)
parser.add_option(
    "-n",
    "--no-wait-for-rate-limit",
    help=
    "If we hit rate limit, exit instead of waiting until it resets (typically 1 hour). Default is to wait.",
    dest="wait_for_rate_limit",
    action='store_false',
    default=True,
)

from ebpub.utils.script_utils import add_verbosity_options, setup_logging_from_opts

add_verbosity_options(parser)


def main(argv=None):
    import sys
    if argv is None:
        argv = sys.argv[1:]
    options, args = parser.parse_args(argv)
    scraper = MeetupScraper(options)
    setup_logging_from_opts(options, scraper.logger)
    scraper.update()


if __name__ == '__main__':
    main()