def stop_running_job(frequency, heritrix): """Stops a running job, notifies RabbitMQ and cleans up the directory.""" launchid = heritrix.launchid(frequency) message = "%s/%s" % (frequency, launchid) job = W3actJob.from_directory("%s/%s" % (settings.HERITRIX_JOBS, frequency), heritrix=heritrix) job.stop() logger.info("Sending SIP message: %s" % message) send_message(settings.QUEUE_HOST, settings.SIP_QUEUE_NAME, settings.SIP_QUEUE_KEY, message) logger.info("Sending QA message: %s" % message) send_message(settings.QUEUE_HOST, settings.QA_QUEUE_NAME, settings.QA_QUEUE_KEY, message) remove_action_files(frequency) if settings.SLACK: stats = generate_log_stats(glob("%s/%s/%s/crawl.log*" % (settings.HERITRIX_LOGS, frequency, launchid))) send_slack_messages(stats, frequency)
def check_watched_targets(jobname, heritrix): """If there are any Watched Targets, send a message.""" timestamp = heritrix.launchid(jobname) if not os.path.exists("%s/%s/%s/w3act-info.json" % (settings.HERITRIX_JOBS, jobname, timestamp)): return with open("%s/%s/%s/w3act-info.json" % (settings.HERITRIX_JOBS, jobname, timestamp), "rb") as i: info = i.read() for job in json.loads(info): if job["watched"]: logger.info("Found a Watched Target in %s/%s." % (jobname, timestamp)) send_message( settings.QUEUE_HOST, settings.WATCHED_QUEUE_NAME, settings.WATCHED_QUEUE_KEY, "%s/%s" % (jobname, timestamp), )