Esempio n. 1
0
def main():

    parser = argparse.ArgumentParser(
        description='Scrape data for single bill, saving data to disk.',
        parents=[base_arg_parser],
    )

    parser.add_argument('module', type=str, help='scraper module (eg. nc)')
    parser.add_argument('chamber', type=str, help='chamber for bill to scrape')
    parser.add_argument('session', type=str, help='session for bill to scrape')
    parser.add_argument('bill_id', type=str, help='bill_id to scrape')

    parser.add_argument('--strict',
                        action='store_true',
                        dest='strict',
                        default=False,
                        help="fail immediately when"
                        "encountering validation warning")
    parser.add_argument('-n',
                        '--no_cache',
                        action='store_true',
                        dest='no_cache',
                        help="don't use web page cache")
    parser.add_argument('--fastmode',
                        help="scrape in fast mode",
                        action="store_true",
                        default=False)
    parser.add_argument('-r',
                        '--rpm',
                        action='store',
                        type=int,
                        dest='rpm',
                        default=60),
    parser.add_argument('--import',
                        dest='do_import',
                        help="import bill after scrape",
                        action="store_true",
                        default=False)

    args = parser.parse_args()

    settings.update(args)

    # set up search path
    sys.path.insert(
        0, os.path.join(os.path.dirname(__file__), '../../openstates'))

    # get metadata
    metadata = __import__(args.module, fromlist=['metadata']).metadata
    abbr = metadata['abbreviation']

    # configure logger
    configure_logging(args.verbose, abbr)

    args.output_dir = os.path.join(settings.BILLY_DATA_DIR, abbr)

    _run_scraper(args, metadata)

    if args.do_import:
        import_bills(abbr, settings.BILLY_DATA_DIR)
Esempio n. 2
0
def main():
    parser = argparse.ArgumentParser(
        description='Import scraped data into database.',
        parents=[base_arg_parser],
    )

    parser.add_argument('abbreviation', type=str,
                        help=('the short name of the data to import'))
    parser.add_argument('-r', '--rpm', type=int, default=60,
                        help=('maximum number of documents to download '
                              'per minute'))
    parser.add_argument('--bills', action='store_true',
                        help='scrape bill data')
    parser.add_argument('--legislators', action='store_true',
                        help='scrape legislator data')
    parser.add_argument('--committees', action='store_true',
                        help='scrape (separate) committee data')
    parser.add_argument('--events', action='store_true',
                        help='scrape event data')
    parser.add_argument('--alldata', action='store_true', dest='alldata',
                        default=False, help="import all available data")

    args = parser.parse_args()

    if not (args.bills or args.legislators or args.committees or
            args.events or args.alldata):
        raise Exception("Must specify at least one type: --bills, "
                           "--legislators, --committees, --events, "
                           "--alldata")

    settings.update(args)

    data_dir = settings.BILLY_DATA_DIR

    # configure logger
    configure_logging(args.verbose, args.abbreviation)

    # always import metadata
    import_metadata(args.abbreviation, data_dir)

    if args.legislators or args.alldata:
        import_legislators(args.abbreviation, data_dir)
    if args.bills or args.alldata:
        import_bills(args.abbreviation, data_dir)
    if args.committees or args.alldata:
        import_committees(args.abbreviation, data_dir)

    # events currently excluded from --alldata
    if args.events:
        import_events(args.abbreviation, data_dir)
Esempio n. 3
0
def main():

    parser = argparse.ArgumentParser(
        description='Scrape data for state, saving data to disk.',
        parents=[base_arg_parser],
    )

    parser.add_argument('state', type=str,
                        help='state scraper module (eg. nc)')
    parser.add_argument('chamber', type=str,
                        help='chamber for bill to scrape')
    parser.add_argument('session', type=str,
                        help='session for bill to scrape')
    parser.add_argument('bill_id', type=str,
                        help='bill_id to scrape')
    parser.add_argument('--strict', action='store_true', dest='strict',
                        default=False, help="fail immediately when"
                        "encountering validation warning")
    parser.add_argument('-n', '--no_cache', action='store_true',
                        dest='no_cache', help="don't use web page cache")
    parser.add_argument('--fastmode', help="scrape in fast mode",
                        action="store_true", default=False)
    parser.add_argument('-r', '--rpm', action='store', type=int, dest='rpm',
                        default=60),
    parser.add_argument('--import', dest='do_import',
                        help="import bill after scrape",
                        action="store_true", default=False)

    args = parser.parse_args()

    settings.update(args)

    # set up search path
    sys.path.insert(0, os.path.join(os.path.dirname(__file__),
                                    '../../openstates'))

    # get metadata
    metadata = __import__(args.state, fromlist=['metadata']).metadata
    state = metadata['abbreviation']

    # configure logger
    configure_logging(args.verbose, state)

    args.output_dir = os.path.join(settings.BILLY_DATA_DIR, args.state)

    _run_scraper(args.state, state, args, metadata)

    if args.do_import:
        import_bills(args.state, settings.BILLY_DATA_DIR)
Esempio n. 4
0
def main():
    try:
        parser = argparse.ArgumentParser(
            description="Scrape data for single bill, saving data to disk.", parents=[base_arg_parser]
        )

        parser.add_argument("module", type=str, help="scraper module (eg. nc)")
        parser.add_argument("chamber", type=str, help="chamber for bill to scrape")
        parser.add_argument("session", type=str, help="session for bill to scrape")
        parser.add_argument("bill_id", type=str, help="bill_id to scrape")

        parser.add_argument(
            "--strict",
            action="store_true",
            dest="strict",
            default=False,
            help="fail immediately when" "encountering validation warning",
        )
        parser.add_argument("-n", "--no_cache", action="store_true", dest="no_cache", help="don't use web page cache")
        parser.add_argument("--fastmode", help="scrape in fast mode", action="store_true", default=False)
        parser.add_argument("-r", "--rpm", action="store", type=int, dest="rpm", default=60),
        parser.add_argument(
            "--import", dest="do_import", help="import bill after scrape", action="store_true", default=False
        )

        args = parser.parse_args()

        settings.update(args)

        # set up search path
        sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../openstates"))

        # get metadata
        metadata = __import__(args.module, fromlist=["metadata"]).metadata
        abbr = metadata["abbreviation"]

        # configure logger
        configure_logging(args.verbose, abbr)

        args.output_dir = os.path.join(settings.BILLY_DATA_DIR, abbr)

        _run_scraper(args, metadata)

        if args.do_import:
            import_bills(abbr, settings.BILLY_DATA_DIR)
    except ScrapeError as e:
        print "Error:", e
        sys.exit(1)
Esempio n. 5
0
def _do_imports(abbrev, args):
    # do imports here so that scrape doesn't depend on mongo
    from billy.importers.metadata import import_metadata
    from billy.importers.bills import import_bills
    from billy.importers.legislators import import_legislators
    from billy.importers.committees import import_committees
    from billy.importers.events import import_events
    from billy.importers.speeches import import_speeches

    # always import metadata and districts
    import_metadata(abbrev)
    report = {}

    if 'legislators' in args.types:
        report['legislators'] = \
            import_legislators(abbrev, settings.BILLY_DATA_DIR)

    if 'bills' in args.types:
        report['bills'] = import_bills(abbrev, settings.BILLY_DATA_DIR)

    if 'committees' in args.types:
        report['committees'] = \
            import_committees(abbrev, settings.BILLY_DATA_DIR)

    if 'events' in args.types or 'speeches' in args.types:
        report['events'] = import_events(abbrev, settings.BILLY_DATA_DIR)

    if 'speeches' in args.types:
        report['speeches'] = import_speeches(abbrev, settings.BILLY_DATA_DIR)

    return report
Esempio n. 6
0
def main():
    try:
        parser = argparse.ArgumentParser(
            description='Scrape data for single bill, saving data to disk.',
            parents=[base_arg_parser],
        )

        parser.add_argument('module', type=str, help='scraper module (eg. nc)')
        parser.add_argument('chamber', type=str,
                            help='chamber for bill to scrape')
        parser.add_argument('session', type=str,
                            help='session for bill to scrape')
        parser.add_argument('bill_id', type=str, help='bill_id to scrape')

        parser.add_argument('--strict', action='store_true', dest='strict',
                            default=False, help="fail immediately when"
                            "encountering validation warning")
        parser.add_argument('-n', '--no_cache', action='store_true',
                            dest='no_cache', help="don't use web page cache")
        parser.add_argument('--fastmode', help="scrape in fast mode",
                            action="store_true", default=False)
        parser.add_argument('-r', '--rpm', action='store', type=int,
                            dest='rpm', default=60),
        parser.add_argument('--import', dest='do_import',
                            help="import bill after scrape",
                            action="store_true", default=False)

        args = parser.parse_args()

        settings.update(args)

        # get metadata
        metadata = __import__(args.module, fromlist=['metadata']).metadata
        abbr = metadata['abbreviation']

        # configure logger
        configure_logging(args.verbose, abbr)

        args.output_dir = os.path.join(settings.BILLY_DATA_DIR, abbr)

        _run_scraper(args, metadata)

        if args.do_import:
            import_bills(abbr, settings.BILLY_DATA_DIR)
    except ScrapeError as e:
        print 'Error:', e
        sys.exit(1)
Esempio n. 7
0
def _do_imports(abbrev, args):
    # do imports here so that scrape doesn't depend on mongo
    from billy.importers.metadata import import_metadata
    from billy.importers.bills import import_bills
    from billy.importers.legislators import import_legislators
    from billy.importers.committees import import_committees
    from billy.importers.events import import_events
    from billy.importers.speeches import import_speeches

    # always import metadata and districts
    import_metadata(abbrev)

    dist_filename = os.path.join(settings.BILLY_MANUAL_DATA_DIR, 'districts',
                                 '%s.csv' % abbrev)
    if os.path.exists(dist_filename):
        db.districts.remove({'abbr': abbrev})
        dist_csv = unicodecsv.DictReader(open(dist_filename))
        for dist in dist_csv:
            dist['_id'] = '%(abbr)s-%(chamber)s-%(name)s' % dist
            dist['boundary_id'] = dist['boundary_id'] % dist
            dist['num_seats'] = int(dist['num_seats'])
            _log.debug(dist)
            db.districts.save(dist, safe=True)
    else:
        logging.getLogger('billy').warning("%s not found, continuing without "
                                           "districts" % dist_filename)

    report = {}

    if 'legislators' in args.types:
        report['legislators'] = import_legislators(
            abbrev,
            settings.BILLY_DATA_DIR
        )

    if 'bills' in args.types:
        report['bills'] = import_bills(abbrev, settings.BILLY_DATA_DIR)

    if 'committees' in args.types:
        report['committees'] = import_committees(
            abbrev,
            settings.BILLY_DATA_DIR
        )

    if 'events' in args.types or 'speeches' in args.types:
        report['events'] = import_events(abbrev, settings.BILLY_DATA_DIR)

    if 'speeches' in args.types:
        report['speeches'] = import_speeches(abbrev, settings.BILLY_DATA_DIR)

    return report
Esempio n. 8
0
def _do_imports(abbrev, args):
    # do imports here so that scrape doesn't depend on mongo
    from billy.importers.metadata import import_metadata
    from billy.importers.bills import import_bills
    from billy.importers.legislators import import_legislators
    from billy.importers.committees import import_committees
    from billy.importers.events import import_events
    from billy.importers.speeches import import_speeches

    # always import metadata and districts
    import_metadata(abbrev)

    dist_filename = os.path.join(settings.BILLY_MANUAL_DATA_DIR, 'districts',
                                 '%s.csv' % abbrev)
    if os.path.exists(dist_filename):
        db.districts.remove({'abbr': abbrev})
        dist_csv = unicodecsv.DictReader(open(dist_filename))
        for dist in dist_csv:
            dist['_id'] = '%(abbr)s-%(chamber)s-%(name)s' % dist
            dist['boundary_id'] = dist['boundary_id'] % dist
            dist['num_seats'] = int(dist['num_seats'])
            db.districts.save(dist, safe=True)
    else:
        logging.getLogger('billy').warning("%s not found, continuing without "
                                           "districts" % dist_filename)

    report = {}

    if 'legislators' in args.types:
        report['legislators'] = \
            import_legislators(abbrev, settings.BILLY_DATA_DIR)

    if 'bills' in args.types:
        report['bills'] = import_bills(abbrev, settings.BILLY_DATA_DIR)

    if 'committees' in args.types:
        report['committees'] = \
            import_committees(abbrev, settings.BILLY_DATA_DIR)

    if 'events' in args.types or 'speeches' in args.types:
        report['events'] = import_events(abbrev, settings.BILLY_DATA_DIR)

    if 'speeches' in args.types:
        report['speeches'] = import_speeches(abbrev, settings.BILLY_DATA_DIR)

    return report
Esempio n. 9
0
    # configure logger
    if args.verbose == 0:
        verbosity = logging.WARNING
    elif args.verbose == 1:
        verbosity = logging.INFO
    else:
        verbosity = logging.DEBUG

    logging.basicConfig(level=verbosity,
                    format="%(asctime)s %(name)s %(levelname)s %(message)s",
                    datefmt="%H:%M:%S")

    # always import metadata
    import_metadata(args.state, data_dir)

    if args.legislators or args.alldata:
        import_legislators(args.state, data_dir)
    if args.bills or args.alldata:
        import_bills(args.state, data_dir)
    if args.committees or args.alldata:
        import_committees(args.state, data_dir)
    if args.votes or args.alldata:
        import_votes(args.state, data_dir)

    # events and versions currently excluded from --alldata
    if args.events:
        import_events(args.state, data_dir)
    if args.versions:
        import_versions(args.state, args.rpm)
Esempio n. 10
0
def main():
    parser = argparse.ArgumentParser(
        description='Import scraped data into database.',
        parents=[base_arg_parser],
    )

    parser.add_argument('abbreviation',
                        type=str,
                        help=('the short name of the data to import'))
    parser.add_argument('-r',
                        '--rpm',
                        type=int,
                        default=60,
                        help=('maximum number of documents to download '
                              'per minute'))
    parser.add_argument('--bills',
                        action='store_true',
                        help='scrape bill data')
    parser.add_argument('--legislators',
                        action='store_true',
                        help='scrape legislator data')
    parser.add_argument('--committees',
                        action='store_true',
                        help='scrape (separate) committee data')
    parser.add_argument('--events',
                        action='store_true',
                        help='scrape event data')
    parser.add_argument('--alldata',
                        action='store_true',
                        dest='alldata',
                        default=False,
                        help="import all available data")

    args = parser.parse_args()

    if not (args.bills or args.legislators or args.committees or args.events
            or args.alldata):
        raise Exception("Must specify at least one type: --bills, "
                        "--legislators, --committees, --events, "
                        "--alldata")

    settings.update(args)

    data_dir = settings.BILLY_DATA_DIR

    # configure logger
    configure_logging(args.verbose, args.abbreviation)

    # always import metadata
    import_metadata(args.abbreviation, data_dir)

    if args.legislators or args.alldata:
        import_legislators(args.abbreviation, data_dir)
    if args.bills or args.alldata:
        import_bills(args.abbreviation, data_dir)
    if args.committees or args.alldata:
        import_committees(args.abbreviation, data_dir)

    # events currently excluded from --alldata
    if args.events:
        import_events(args.abbreviation, data_dir)
Esempio n. 11
0
                        default=False, help="import all available data")

    args = parser.parse_args()

    if not (args.bills or args.legislators or args.committees or
            args.events or args.alldata):
        raise Exception("Must specify at least one type: --bills, "
                           "--legislators, --committees, --events, "
                           "--alldata")

    settings.update(args)

    data_dir = settings.BILLY_DATA_DIR

    # configure logger
    configure_logging(args.verbose, args.abbreviation)

    # always import metadata
    import_metadata(args.abbreviation, data_dir)

    if args.legislators or args.alldata:
        import_legislators(args.abbreviation, data_dir)
    if args.bills or args.alldata:
        import_bills(args.abbreviation, data_dir)
    if args.committees or args.alldata:
        import_committees(args.abbreviation, data_dir)

    # events currently excluded from --alldata
    if args.events:
        import_events(args.abbreviation, data_dir)
Esempio n. 12
0
    args = parser.parse_args()

    if not (args.bills or args.legislators or args.committees or args.events
            or args.versions or args.alldata):
        raise Exception("Must specify at least one type: --bills, "
                        "--legislators, --committees, --events, "
                        "--versions,  --alldata")

    settings.update(args)

    data_dir = settings.BILLY_DATA_DIR

    # configure logger
    configure_logging(args.verbose, args.state)

    # always import metadata
    import_metadata(args.state, data_dir)

    if args.legislators or args.alldata:
        import_legislators(args.state, data_dir)
    if args.bills or args.alldata:
        import_bills(args.state, data_dir)
    if args.committees or args.alldata:
        import_committees(args.state, data_dir)

    # events and versions currently excluded from --alldata
    if args.events:
        import_events(args.state, data_dir)
    if args.versions:
        import_versions(args.state, args.rpm)
Esempio n. 13
0
                        help="import all available data")

    args = parser.parse_args()

    if not (args.bills or args.legislators or args.committees or args.events
            or args.alldata):
        raise Exception("Must specify at least one type: --bills, "
                        "--legislators, --committees, --events, "
                        "--alldata")

    settings.update(args)

    data_dir = settings.BILLY_DATA_DIR

    # configure logger
    configure_logging(args.verbose, args.abbreviation)

    # always import metadata
    import_metadata(args.abbreviation, data_dir)

    if args.legislators or args.alldata:
        import_legislators(args.abbreviation, data_dir)
    if args.bills or args.alldata:
        import_bills(args.abbreviation, data_dir)
    if args.committees or args.alldata:
        import_committees(args.abbreviation, data_dir)

    # events currently excluded from --alldata
    if args.events:
        import_events(args.abbreviation, data_dir)