Ejemplo n.º 1
0
def brozzler_new_job(argv=None):
    '''
    Command line utility entry point for queuing a new brozzler job. Takes a
    yaml brozzler job configuration file, creates job, sites, and pages objects
    in rethinkdb, which brozzler-workers will look at and start crawling.
    '''
    argv = argv or sys.argv
    arg_parser = argparse.ArgumentParser(
        prog=os.path.basename(argv[0]),
        description='brozzler-new-job - queue new job with brozzler',
        formatter_class=BetterArgumentDefaultsHelpFormatter)
    arg_parser.add_argument('job_conf_file',
                            metavar='JOB_CONF_FILE',
                            help='brozzler job configuration file in yaml')
    add_rethinkdb_options(arg_parser)
    add_common_options(arg_parser, argv)

    args = arg_parser.parse_args(args=argv[1:])
    configure_logging(args)

    rr = rethinker(args)
    frontier = brozzler.RethinkDbFrontier(rr)
    try:
        brozzler.new_job_file(frontier, args.job_conf_file)
    except brozzler.InvalidJobConf as e:
        print('brozzler-new-job: invalid job file:',
              args.job_conf_file,
              file=sys.stderr)
        print('  ' + yaml.dump(e.errors).rstrip().replace('\n', '\n  '),
              file=sys.stderr)
        sys.exit(1)
Ejemplo n.º 2
0
def brozzler_new_job(argv=None):
    '''
    Command line utility entry point for queuing a new brozzler job. Takes a
    yaml brozzler job configuration file, creates job, sites, and pages objects
    in rethinkdb, which brozzler-workers will look at and start crawling.
    '''
    argv = argv or sys.argv
    arg_parser = argparse.ArgumentParser(
            prog=os.path.basename(argv[0]),
            description='brozzler-new-job - queue new job with brozzler',
            formatter_class=BetterArgumentDefaultsHelpFormatter)
    arg_parser.add_argument(
            'job_conf_file', metavar='JOB_CONF_FILE',
            help='brozzler job configuration file in yaml')
    add_rethinkdb_options(arg_parser)
    add_common_options(arg_parser, argv)

    args = arg_parser.parse_args(args=argv[1:])
    configure_logging(args)

    rr = rethinker(args)
    frontier = brozzler.RethinkDbFrontier(rr)
    try:
        brozzler.new_job_file(frontier, args.job_conf_file)
    except brozzler.InvalidJobConf as e:
        print('brozzler-new-job: invalid job file:', args.job_conf_file, file=sys.stderr)
        print('  ' + yaml.dump(e.errors).rstrip().replace('\n', '\n  '), file=sys.stderr)
        sys.exit(1)