def brozzler_new_job(argv=None): ''' Command line utility entry point for queuing a new brozzler job. Takes a yaml brozzler job configuration file, creates job, sites, and pages objects in rethinkdb, which brozzler-workers will look at and start crawling. ''' argv = argv or sys.argv arg_parser = argparse.ArgumentParser( prog=os.path.basename(argv[0]), description='brozzler-new-job - queue new job with brozzler', formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument('job_conf_file', metavar='JOB_CONF_FILE', help='brozzler job configuration file in yaml') add_rethinkdb_options(arg_parser) add_common_options(arg_parser, argv) args = arg_parser.parse_args(args=argv[1:]) configure_logging(args) rr = rethinker(args) frontier = brozzler.RethinkDbFrontier(rr) try: brozzler.new_job_file(frontier, args.job_conf_file) except brozzler.InvalidJobConf as e: print('brozzler-new-job: invalid job file:', args.job_conf_file, file=sys.stderr) print(' ' + yaml.dump(e.errors).rstrip().replace('\n', '\n '), file=sys.stderr) sys.exit(1)
def brozzler_new_job(argv=None): ''' Command line utility entry point for queuing a new brozzler job. Takes a yaml brozzler job configuration file, creates job, sites, and pages objects in rethinkdb, which brozzler-workers will look at and start crawling. ''' argv = argv or sys.argv arg_parser = argparse.ArgumentParser( prog=os.path.basename(argv[0]), description='brozzler-new-job - queue new job with brozzler', formatter_class=BetterArgumentDefaultsHelpFormatter) arg_parser.add_argument( 'job_conf_file', metavar='JOB_CONF_FILE', help='brozzler job configuration file in yaml') add_rethinkdb_options(arg_parser) add_common_options(arg_parser, argv) args = arg_parser.parse_args(args=argv[1:]) configure_logging(args) rr = rethinker(args) frontier = brozzler.RethinkDbFrontier(rr) try: brozzler.new_job_file(frontier, args.job_conf_file) except brozzler.InvalidJobConf as e: print('brozzler-new-job: invalid job file:', args.job_conf_file, file=sys.stderr) print(' ' + yaml.dump(e.errors).rstrip().replace('\n', '\n '), file=sys.stderr) sys.exit(1)