Example #1
0
def test_auth():
    h = hapy.Hapy(
        BASE_URL,
        username='******',
        password='******'
    )
    a = requests.auth.HTTPDigestAuth('username', 'password')
    assert_equals(
        a.username,
        h.auth.username
    )
    assert_equals(
        a.password,
        h.auth.password
    )
Example #2
0
def test_supply_timeout(mock_requests):
    h = hapy.Hapy(BASE_URL, timeout=0.005)
    r = Mock()
    r.status_code = 303
    r.request = Mock()
    mock_requests.post.return_value = r
    name = 'test_build_job'
    h.build_job(name)
    mock_requests.post.assert_called_with(
        url='https://localhost:8443/engine/job/%s' % name,
        data=dict(
            action='build'
        ),
        auth=None,
        verify=False,
        headers={'accept': 'application/xml'},
        allow_redirects=False,
        timeout=0.005
    )
Example #3
0
def restart_job(frequency, start=datetime.utcnow()):
    """Restarts the job for a particular frequency."""
    logger.info("Restarting %s at %s" % (frequency, start))
    try:
        w = w3act(args.w3act_url, args.w3act_user, args.w3act_pw)

        export = w.get_ld_export(frequency)
        logger.debug("Found %s Targets in export." % len(export))
        targets = [
            t for t in export
            if (t["crawlStartDateISO"] is None
                or dateutil.parser.parse(t["crawlStartDateISO"]) < start) and (
                    t["crawlEndDateISO"] is None
                    or dateutil.parser.parse(t["crawlEndDateISO"]) > start)
        ]
        logger.debug("Found %s Targets in date range." % len(targets))
        h = hapy.Hapy("https://%s:%s" % (args.host, args.port),
                      username=args.user,
                      password=args.password)
        #h = heritrix.API(host="https://%s:%s/engine" % (settings.HERITRIX_HOST, settings.HERITRIX_PORTS[frequency]), user="******", passwd="bl_uk", verbose=False, verify=False)
        if frequency in h.listjobs() and h.status(frequency) != "":
            stop_running_job(frequency, h)
            #TODO: Automated QA
        job = W3actJob(targets, name=frequency, heritrix=h)
        if not args.test:
            logger.debug("Starting job %s with %s seeds." %
                         (job.name, len(job.seeds)))
            job.start()
        else:
            logger.debug("Would start job %s with %s seeds." %
                         (job.name, len(job.seeds)))
            logger.debug("Seeds:")
            for surl in job.seeds:
                logger.debug("- %s" % surl)

    except:
        logger.error("%s: %s" % (frequency, str(sys.exc_info())))
        logger.error("%s: %s" % (frequency, traceback.format_exc()))
Example #4
0
def main(argv=None):
    """
        h3cc

        Command crawler control.
        """

    if argv is None:
        argv = sys.argv
    else:
        sys.argv.extend(argv)

    program_name = os.path.basename(sys.argv[0])
    program_version = "v%s" % __version__
    program_build_date = str(__updated__)
    program_version_message = '%%(prog)s %s (%s)' % (program_version,
                                                     program_build_date)
    #program_shortdesc = __import__('__main__').__doc__.split("\n")[1]
    program_shortdesc = __import__('__main__').__doc__
    program_license = '''%s

  Created by Andrew Jackson on %s.
  Copyright 2016 The British Library.

  Licensed under the Apache License 2.0
  http://www.apache.org/licenses/LICENSE-2.0

  Distributed on an "AS IS" basis without warranties
  or conditions of any kind, either express or implied.

USAGE
''' % (program_shortdesc, str(__date__))

    try:
        # Setup argument parser
        parser = ArgumentParser(description=program_license,
                                formatter_class=RawDescriptionHelpFormatter)
        parser.add_argument("-v",
                            "--verbose",
                            dest="verbose",
                            action="count",
                            help="set verbosity level [default: %(default)s]",
                            default=0)
        parser.add_argument('-V',
                            '--version',
                            action='version',
                            version=program_version_message)
        parser.add_argument(
            '-j',
            '--job',
            dest='job',
            default='frequent',
            help="Name of job to operate upon. [default: %(default)s]")
        parser.add_argument(
            '-H',
            '--host',
            dest='host',
            default='localhost',
            help="Name of host to connect to. [default: %(default)s]")
        parser.add_argument(
            '-P',
            '--port',
            dest='port',
            default='8443',
            help="Secure port to connect to. [default: %(default)s]")
        parser.add_argument(
            '-u',
            '--user',
            dest='user',
            type=str,
            default="heritrix",
            help="H3 user to login with [default: %(default)s]")
        parser.add_argument('-p',
                            '--password',
                            dest='password',
                            type=str,
                            default="heritrix",
                            help="H3 user password [default: %(default)s]")
        parser.add_argument(
            '-q'
            '--query-url',
            dest='query_url',
            type=str,
            default='http://www.bbc.co.uk/news',
            help="URL to use for queries [default: %(default)s]")
        parser.add_argument(
            '-l'
            '--query-limit',
            dest='query_limit',
            type=int,
            default=10,
            help=
            "Maximum number of results to return from queries [default: %(default)s]"
        )
        parser.add_argument(dest="command",
                            help="Command to carry out. One of: " +
                            ", ".join(H3_SCRIPTS_JOB + H3_SCRIPTS_JOB_URL) +
                            ". [default: %(default)s]",
                            metavar="command")

        # Process arguments
        args = parser.parse_args()

        # Up the logging
        verbose = args.verbose
        if verbose > 0:
            logger.setLevel(logging.DEBUG)

        # talk to h3:
        ha = hapy.Hapy("https://%s:%s" % (args.host, args.port),
                       username=args.user,
                       password=args.password)
        job = args.job

        # Commands:
        command = args.command
        if command == "status":
            print(ha.get_info())
        elif command == "list-jobs":
            # FIXME Cope when singular hash or array of hashes
            j = ha.get_info()['engine']['jobs']['value']
            print(j['key'])
        elif command == "job-summary":
            # FIXME Cope when singular hash or array of hashes
            for j in ha.get_info()['engine']['jobs']['value']:
                if job == j['key']:
                    print(j)
        elif command == "job-build":
            ha.build_job(job)
        elif command == "job-launch":
            ha.launch_job(job)
        elif command == "job-resume":
            ha.launch_from_latest_checkpoint(job)
        elif command == "job-pause":
            ha.pause_job(job)
        elif command == "job-unpause":
            ha.unpause_job(job)
        elif command == "job-checkpoint":
            ha.checkpoint_job(job)
        elif command == "job-terminate":
            ha.terminate_job(job)
        elif command == "job-teardown":
            ha.teardown_job(job)
        elif command == "job-status":
            print(ha.get_job_info(job)['job']['statusDescription'])
        elif command == "job-info":
            print(ha.get_job_info(job))
        elif command == "job-info-json":
            print(json.dumps(ha.get_job_info(job), indent=4))
        elif command == "job-cxml":
            print(ha.get_job_configuration(job))
        elif command in H3_SCRIPTS_JOB:
            template = env.get_template('%s.groovy' % command)
            r = ha.execute_script(engine="groovy",
                                  script=template.render(),
                                  name=job)
            print(r[0])
        elif command in H3_SCRIPTS_JOB_URL:
            template = env.get_template('%s.groovy' % command)
            r = ha.execute_script(engine="groovy",
                                  script=template.render({
                                      "url":
                                      args.query_url,
                                      "limit":
                                      args.query_limit
                                  }),
                                  name=job)
            print(r[0])
        else:
            logger.error("Can't understand command '%s'" % command)

        return 0
    except KeyboardInterrupt:
        ### handle keyboard interrupt ###
        return 0
    except Exception as e:
        indent = len(program_name) * " "
        sys.stderr.write(program_name + ": " + repr(e) + "\n")
        sys.stderr.write(indent + "  for help use --help")
        logger.exception(e)
        return 2
Example #5
0
    while action not in info['job']['availableActions']['value']:
        time.sleep(1)
        info = h.get_job_info(job_name)


# main

try:
    name = sys.argv[1]
except IndexError:
    print "Usage: run_job.py job-name"
    sys.exit(1)

try:
    h = hapy.Hapy('https://localhost:8443',
                  username='******',
                  password='******')
    state = get_state(h, name)
    if cmp(state, "running"):
        print "error: job is still running"
        sys.exit(1)
    elif not cmp(state, "finished"):
        print "error: job is in unexpected state: %s" % state
        sys.exit(1)

    # job should be finished by now
    h.teardown_job(name)
    wait_for_state(h, name, "unbuilt")

    # now build it
    h.build_job(name)
Example #6
0
def test_auth_no_username():
    h = hapy.Hapy(BASE_URL, password='******')
    assert_is_none(h.auth)
Example #7
0
def test_auth_no_password():
    h = hapy.Hapy(BASE_URL, username='******')
    assert_is_none(h.auth)
Example #8
0
def test_auth_nothing():
    h = hapy.Hapy(BASE_URL)
    assert_is_none(h.auth)
Example #9
0
def test_url_normalise():
    h1 = hapy.Hapy('http://localhost:8443')
    h2 = hapy.Hapy('http://localhost:8443/')
    assert_equals(h1.base_url, h2.base_url)
Example #10
0
def setup():
    global h
    h = hapy.Hapy(BASE_URL)