Esempio n. 1
0
def stop_running_job(frequency, heritrix):
    """Stops a running job, notifies RabbitMQ and cleans up the directory."""
    launchid = heritrix.launchid(frequency)
    message = "%s/%s" % (frequency, launchid)
    job = W3actJob.from_directory("%s/%s" % (settings.HERITRIX_JOBS, frequency), heritrix=heritrix)
    job.stop()
    logger.info("Sending SIP message: %s" % message)
    send_message(settings.QUEUE_HOST, settings.SIP_QUEUE_NAME, settings.SIP_QUEUE_KEY, message)
    logger.info("Sending QA message: %s" % message)
    send_message(settings.QUEUE_HOST, settings.QA_QUEUE_NAME, settings.QA_QUEUE_KEY, message)
    remove_action_files(frequency)
    if settings.SLACK:
        stats = generate_log_stats(glob("%s/%s/%s/crawl.log*" % (settings.HERITRIX_LOGS, frequency, launchid)))
        send_slack_messages(stats, frequency)
Esempio n. 2
0
def check_watched_targets(jobname, heritrix):
    """If there are any Watched Targets, send a message."""
    timestamp = heritrix.launchid(jobname)
    if not os.path.exists("%s/%s/%s/w3act-info.json" % (settings.HERITRIX_JOBS, jobname, timestamp)):
        return
    with open("%s/%s/%s/w3act-info.json" % (settings.HERITRIX_JOBS, jobname, timestamp), "rb") as i:
        info = i.read()
    for job in json.loads(info):
        if job["watched"]:
            logger.info("Found a Watched Target in %s/%s." % (jobname, timestamp))
            send_message(
                settings.QUEUE_HOST,
                settings.WATCHED_QUEUE_NAME,
                settings.WATCHED_QUEUE_KEY,
                "%s/%s" % (jobname, timestamp),
            )