def list_job(request): cclient = chronos.connect('{}:{}'.format(settings.CHRONOS['host'], settings.CHRONOS['port'])) jobs = cclient.list() for job in jobs: last_success = job['lastSuccess'] last_error = job['lastError'] if last_success == "" and last_error != "": job['last'] = "error" elif last_success != "" and last_error == "": job['last'] = "success" elif last_success != "" and last_error != "": last_success = dateutil.parser.parse(job['lastSuccess']) last_error = dateutil.parser.parse(job['lastError']) if last_error > last_success: job['last'] = "error" else: job['last'] = "success" elif last_success == "" and last_error == "": job['last'] = "-" job['stat'] = cclient.job_stat(job['name'])['histogram'] data = {'jobs': jobs} data['refresh'] = 3000 return render(request, 'chronos_mgmt/list_job.html', data)
def get_stars_client(): # store client in static function var, so there's only one instance if not hasattr(get_stars_client, "stars_client"): services_endpoint = os.getenv("DATACOMMONS_SERVICES_ENDPOINT") c = os.getenv("CHRONOS_URL") if not c: raise RuntimeError("The datacommons module requires CHRONOS_URL to be set") urlp = six.moves.urllib.parse.urlparse(c) if urlp.scheme: chronos_proto = urlp.scheme chronos_endpoint = c[len(urlp.scheme) + 3:] else: chronos_proto = "https" chronos_endpoint = c get_stars_client.stars_client = Stars( services_endpoints = services_endpoint.split(",") if services_endpoint else None, scheduler_endpoints = chronos_endpoint.split(",") if chronos_endpoint else None) # override chronos client to use proto set in DATACOMMONS_CHRONOS_PROTO # TODO update stars to allow proto specification # https://github.com/stevencox/cwltool/issues/14 get_stars_client.stars_client.scheduler.client = chronos.connect(chronos_endpoint, proto=chronos_proto) return get_stars_client.stars_client
def list_job(request): cclient = chronos.connect("{}:{}".format(settings.CHRONOS["host"], settings.CHRONOS["port"])) jobs = cclient.list() for job in jobs: last_success = job["lastSuccess"] last_error = job["lastError"] if last_success == "" and last_error != "": job["last"] = "error" elif last_success != "" and last_error == "": job["last"] = "success" elif last_success != "" and last_error != "": last_success = dateutil.parser.parse(job["lastSuccess"]) last_error = dateutil.parser.parse(job["lastError"]) if last_error > last_success: job["last"] = "error" else: job["last"] = "success" elif last_success == "" and last_error == "": job["last"] = "-" job["stat"] = cclient.job_stat(job["name"])["histogram"] data = {"jobs": jobs} data["refresh"] = 3000 return render(request, "chronos_mgmt/list_job.html", data)
def get_chronos_client(config): """Returns a chronos client object for interacting with the API""" chronos_hosts = config.get_url() chronos_hostnames = [urlparse.urlsplit(hostname).netloc for hostname in chronos_hosts] log.info("Attempting to connect to Chronos servers: %s" % chronos_hosts) return chronos.connect(servers=chronos_hostnames, username=config.get_username(), password=config.get_password())
def main(): parser = argparse.ArgumentParser(description="Monitor the status of Chronos Jobs") parser.add_argument("--hostname", metavar="<host:port>", required=True, help="hostname and port of the Chronos instance") parser.add_argument("--prefix", metavar="job-prefix", required=False, action="append", help="if set, only check jobs matching this prefix") parser.add_argument("--exclude", metavar="job-prefix", required=False, action="append", help="if set, exclude jobs matching this prefix") parser.add_argument("--warn", metavar="#", default=1, help="warn if at least this number of jobs are currently failed") parser.add_argument("--crit", metavar="#", default=1, help="critical if at least this number of jobs are currently failed") args = parser.parse_args() fails = [] ok = [] unknown = [] c = chronos.connect(args.hostname) cjobs = c.list() if not isinstance(cjobs, list): print "UNKNOWN: error querying chronos" sys.exit(3) for job in cjobs: if job['disabled']: continue if isinstance(args.prefix, list): if not match_prefix(args.prefix, job['name']): continue if isinstance(args.exclude, list): if match_prefix(args.exclude, job['name']): continue if job['lastError'] > job['lastSuccess']: fails.append(job['name'].encode('ascii')) elif job['lastSuccess']: ok.append(job['name'].encode('ascii')) else: unknown.append(job['name'].encode('ascii')) if len(unknown) > 0: umsg = "(%d waiting for execution or with no data)" % len(unknown) else: umsg = '' if len(fails) == 0: print "OK: %d jobs succeeded on last run %s" % (len(ok), umsg) sys.exit(0) elif len(fails) >= int(args.crit): print "CRITICAL: %d failed jobs: %s %s" % (len(fails), str(fails).strip('[]'), umsg) sys.exit(2) elif len(fails) >= int(args.warn): print "WARNING: %d failed jobs: %s %s" % (len(fails), str(fails).strip('[]'), umsg) sys.exit(1)
def dashboard(request): data = {} data['total_template'] = Template.objects.count() mc = MarathonClient('http://{}:{}'.format(settings.MARATHON['host'], settings.MARATHON['port'])) data['total_app'] = len(mc.list_apps()) cclient = chronos.connect('{}:{}'.format(settings.CHRONOS['host'], settings.CHRONOS['port'])) jobs = cclient.list() data['total_job'] = len(cclient.list()) data['total_watcher'] = len(settings.WATCHER_THREADS) return render(request, 'dashboard/dashboard.html',data)
def working_chronos(context): """Adds a working chronos client as context.client for the purposes of interacting with it in the test.""" if not hasattr(context, 'client'): chronos_servers = ['127.0.0.1:4400'] chronos_version = context.config.userdata.get('chronos_version', DEFAULT_CHRONOS_VERSION) if chronos_version in LEGACY_VERSIONS: scheduler_api_version = None else: scheduler_api_version = 'v1' context.client = chronos.connect(chronos_servers, scheduler_api_version=scheduler_api_version)
def __init__(self, app, nworkers, **kwargs): """Initialize this job runner and start the monitor thread""" assert chronos, CHRONOS_IMPORT_MSG if self.RUNNER_PARAM_SPEC_KEY not in kwargs: kwargs[self.RUNNER_PARAM_SPEC_KEY] = {} kwargs[self.RUNNER_PARAM_SPEC_KEY].update(self.RUNNER_PARAM_SPEC) super().__init__(app, nworkers, **kwargs) protocol = 'http' if self.runner_params.get('insecure', True) else 'https' self._chronos_client = chronos.connect( self.runner_params['chronos'], username=self.runner_params.get('username'), password=self.runner_params.get('password'), proto=protocol)
def __init__(self, app, nworkers, **kwargs): """Initialize this job runner and start the monitor thread""" assert chronos, CHRONOS_IMPORT_MSG if self.RUNNER_PARAM_SPEC_KEY not in kwargs: kwargs[self.RUNNER_PARAM_SPEC_KEY] = {} kwargs[self.RUNNER_PARAM_SPEC_KEY].update(self.RUNNER_PARAM_SPEC) super(ChronosJobRunner, self).__init__(app, nworkers, **kwargs) protocol = 'http' if self.runner_params.get('insecure', True) else 'https' self._chronos_client = chronos.connect( self.runner_params['chronos'], username=self.runner_params.get('username'), password=self.runner_params.get('password'), proto=protocol) self._init_monitor_thread() self._init_worker_threads()
def dashboard(request): data = {} data["total_template"] = Template.objects.count() try: mc = MarathonClient("http://{}:{}".format(settings.MARATHON["host"], settings.MARATHON["port"])) data["total_app"] = len(mc.list_apps()) except Exception as e: data["total_app"] = [] try: cclient = chronos.connect("{}:{}".format(settings.CHRONOS["host"], settings.CHRONOS["port"])) jobs = cclient.list() data["total_job"] = len(cclient.list()) except Exception as e: data["total_job"] = [] data["total_watcher"] = len(settings.WATCHER_THREADS) return render(request, "dashboard/dashboard.html", data)
def send_to_chronos(request): try: if request.method == "POST": action = request.POST.get("action", None) job_name = request.POST.get("name", None) cclient = chronos.connect("{}:{}".format(settings.CHRONOS["host"], settings.CHRONOS["port"])) if action == "destroy": cclient.delete(job_name) elif action == "run": cclient.run(job_name) result = '{"status":"success", "msg": "%(action)s success"}' % {"action": action} except Exception as e: result = '{"status":"error", "msg": "%(action)s fail: %(error)s" }' % { "action": action, "error": html.escape(str(e)), } return HttpResponse(result)
def send_to_chronos(request): try: if request.method == 'POST': action = request.POST.get('action', None) job_name = request.POST.get('name', None) cclient = chronos.connect('{}:{}'.format(settings.CHRONOS['host'], settings.CHRONOS['port'])) if action == 'destroy': if request.user.has_perm("auth.can_init_job"): cclient.delete(job_name) else: raise PermissionDenied elif action == 'run': cclient.run(job_name) result = '{"status":"success", "msg": "%(action)s success"}'%{"action":action} except Exception as e: result = '{"status":"error", "msg": "%(action)s fail: %(error)s" }'%{"action":action, "error": html.escape(str(e))} return HttpResponse(result)
def dashboard(request): data = {} data['total_template'] = Template.objects.count() try: mc = MarathonClient('http://{}:{}'.format(settings.MARATHON['host'], settings.MARATHON['port'])) data['total_app'] = len(mc.list_apps()) except Exception as e: data['total_app'] = [] try: cclient = chronos.connect('{}:{}'.format(settings.CHRONOS['host'], settings.CHRONOS['port'])) jobs = cclient.list() data['total_job'] = len(cclient.list()) except Exception as e: data['total_job'] = [] data['total_watcher'] = len(settings.WATCHER_THREADS) return render(request, 'dashboard/dashboard.html', data)
def get_chronos_client(config, cached=False): """Returns a chronos client object for interacting with the API""" chronos_hosts = config.get_url() chronos_hostnames = [urlsplit(hostname).netloc for hostname in chronos_hosts] log.info("Attempting to connect to Chronos servers: %s" % chronos_hosts) if cached: return CachingChronosClient( servers=chronos_hostnames, username=config.get_username(), password=config.get_password(), scheduler_api_version=None, ) else: return chronos.connect( servers=chronos_hostnames, username=config.get_username(), password=config.get_password(), scheduler_api_version=None, )
def new_job(request): data = {} if request.method == 'POST': data['msg'] = "Post" post_params = {} for key in request.POST: if key.startswith("filehidden"): fkey = key[11:] if(request.FILES.get(fkey, None)): post_file = request.FILES[fkey] file_content="" for chunk in post_file.chunks(): file_content += chunk.decode("utf8") post_params[fkey] = convert(file_content) else: post_params[fkey] = request.POST[key] else: post_params[key] = request.POST[key] template = Template.objects.get(pk=post_params['template_id']) content = template.content%post_params data['content'] = content cclient = chronos.connect('{}:{}'.format(settings.CHRONOS['host'], settings.CHRONOS['port'])) try: print(type(json.loads(content))) cclient.add(json.loads(content)) data['result'] = "Success" except Exception as e: data['result'] = str(e) templates = Template.objects.filter(type="chronos").order_by('name').all() for template in templates: template.params = template.param_set.order_by('id') data['templates'] = templates return render(request, 'chronos_mgmt/new_job.html', data)
def new_job(request): data = {} if request.method == 'POST': data['msg'] = "Post" post_params = {} for key in request.POST: if key.startswith("filehidden"): fkey = key[11:] if (request.FILES.get(fkey, None)): post_file = request.FILES[fkey] file_content = "" for chunk in post_file.chunks(): file_content += chunk.decode("utf8") post_params[fkey] = convert(file_content) else: post_params[fkey] = request.POST[key] else: post_params[key] = request.POST[key] template = Template.objects.get(pk=post_params['template_id']) content = template.content % post_params data['content'] = content cclient = chronos.connect('{}:{}'.format(settings.CHRONOS['host'], settings.CHRONOS['port'])) try: print(type(json.loads(content))) cclient.add(json.loads(content)) data['result'] = "Success" except Exception as e: data['result'] = str(e) templates = Template.objects.filter(type="chronos").order_by('name').all() for template in templates: template.params = template.param_set.order_by('id') data['templates'] = templates return render(request, 'chronos_mgmt/new_job.html', data)
def send_to_chronos(request): try: if request.method == 'POST': action = request.POST.get('action', None) job_name = request.POST.get('name', None) cclient = chronos.connect('{}:{}'.format(settings.CHRONOS['host'], settings.CHRONOS['port'])) if action == 'destroy': if request.user.has_perm("auth.can_init_job"): cclient.delete(job_name) else: raise PermissionDenied elif action == 'run': cclient.run(job_name) result = '{"status":"success", "msg": "%(action)s success"}' % { "action": action } except Exception as e: result = '{"status":"error", "msg": "%(action)s fail: %(error)s" }' % { "action": action, "error": html.escape(str(e)) } return HttpResponse(result)
'-t', '--template', 'template_names', envvar="{0}_TEMPLATES".format(CONTEXT_SETTINGS['auto_envvar_prefix']), type=str, required=True, help="Path of the template to use for deployment.", multiple=True, ) chronos_client = click.option( '--chronos_url', 'chronos_client', envvar="{0}_CHRONOS_URL".format(CONTEXT_SETTINGS['auto_envvar_prefix']), required=True, help='URL of the Chronos endpoint to use', callback=lambda c, p, v: chronos.connect([v]), ) job_name = click.option( '-j', '--job-name', type=str, help='Restrict command to specific job.', ) marathon_lb_url = click.option( '--marathon_lb_url', 'marathon_lb_url', envvar="{0}_MARATHON_LB_URL".format(CONTEXT_SETTINGS['auto_envvar_prefix']), required=True, help="url of marathon lb",
def __init__(self, endpoints, proto="http"): self.client = chronos.connect(endpoints, proto="https")
def setup_chronos_client(): connection_string = get_service_connection_string('chronos') return chronos.connect(connection_string, scheduler_api_version=None)
def working_chronos(context): """Adds a working chronos client as context.client for the purposes of interacting with it in the test.""" if not hasattr(context, 'client'): chronos_connection_string = get_chronos_connection_string() context.client = chronos.connect(chronos_connection_string)
def setup_chronos_client(): connection_string = get_service_connection_string('chronos') return chronos.connect(connection_string)
def main(): parser = argparse.ArgumentParser( description="Tool for syncing Chronos jobs from local .json files") parser.add_argument("--hostname", metavar="<host:port>", required=True, help="hostname and port of the Chronos instance") group = parser.add_mutually_exclusive_group(required=True) group.add_argument( "--sync", metavar="/path/to/dir", help= "path to a directory containing json files describing chronos jobs. \ All sub-directories will be searched for files ending in .json" ) group.add_argument("--list", action="store_true", help="list jobs on chronos") parser.add_argument( "-n", action="store_true", default=False, help="dry-run, don't actually push anything to chronos") args = parser.parse_args() c = chronos.connect(args.hostname) cjobs = c.list() if args.list: # cjobs isn't json but this still gets us the pretty print(json.dumps(cjobs, sort_keys=True, indent=4)) sys.exit(0) if args.sync: jobs = {} retry = {'update': [], 'add': []} for job in cjobs: jobs[job["name"]] = job if not os.path.isdir(args.sync): raise Exception("%s must be a directory" % args.sync) job_files = find_json_files(args.sync) for file in job_files: job = read_job_file(file) if not job: print("Skipping %s" % file) else: if job['name'] in jobs: if check_update(jobs, job): print("Updating job %s from file %s" % (job['name'], file)) if not args.n: try: c.update(job) except: retry['update'].append(job) else: print("Job %s defined in %s is up-to-date on Chronos" % (job['name'], file)) else: print("Adding job %s from file %s" % (job['name'], file)) if not args.n: try: c.add(job) except: retry['add'].append(job) attempt = 0 while (len(retry['update']) > 0 or len(retry['add']) > 0) and attempt < 10: attempt += 1 if len(retry['update']) > 0: job = retry['update'].pop(0) try: print("Retry %d for job %s" % (attempt, job['name'])) c.update(job) except: retry['update'].append(job) if len(retry['add']) > 0: job = retry['add'].pop(0) try: print("Retry %d for job %s" % (attempt, job['name'])) c.add(job) except: retry['add'].append(job) if len(retry['update']) > 0 or len(retry['add']) > 0: print("Failed Jobs: %s" % sorted((retry['update'] + retry['add'])))
def get_chronos_client(endpoint, proto): client = chronos.connect(endpoint, proto=proto) return client
def main(): parser = argparse.ArgumentParser( description="Monitor the status of Chronos Jobs") parser.add_argument("--hostname", metavar="<host:port>", required=True, help="hostname and port of the Chronos instance") parser.add_argument("--prefix", metavar="job-prefix", required=False, action="append", help="if set, only check jobs matching this prefix") parser.add_argument("--exclude", metavar="job-prefix", required=False, action="append", help="if set, exclude jobs matching this prefix") parser.add_argument( "--warn", metavar="#", default=1, help="warn if at least this number of jobs are currently failed") parser.add_argument( "--crit", metavar="#", default=1, help="critical if at least this number of jobs are currently failed") args = parser.parse_args() fails = [] ok = [] unknown = [] c = chronos.connect(args.hostname) cjobs = c.list() if not isinstance(cjobs, list): print "UNKNOWN: error querying chronos" sys.exit(3) for job in cjobs: if job['disabled']: continue if isinstance(args.prefix, list): if not match_prefix(args.prefix, job['name']): continue if isinstance(args.exclude, list): if match_prefix(args.exclude, job['name']): continue if job['lastError'] > job['lastSuccess']: fails.append(job['name'].encode('ascii')) elif job['lastSuccess']: ok.append(job['name'].encode('ascii')) else: unknown.append(job['name'].encode('ascii')) if len(unknown) > 0: umsg = "(%d waiting for execution or with no data)" % len(unknown) else: umsg = '' if len(fails) == 0: print "OK: %d jobs succeeded on last run %s" % (len(ok), umsg) sys.exit(0) elif len(fails) >= int(args.crit): print "CRITICAL: %d failed jobs: %s %s" % ( len(fails), str(fails).strip('[]'), umsg) sys.exit(2) elif len(fails) >= int(args.warn): print "WARNING: %d failed jobs: %s %s" % (len(fails), str(fails).strip('[]'), umsg) sys.exit(1)
def main(): parser = argparse.ArgumentParser(description="Tool for syncing Chronos jobs from local .json files") parser.add_argument("--hostname", metavar="<host:port>", required=True, help="hostname and port of the Chronos instance") group = parser.add_mutually_exclusive_group(required=True) group.add_argument("--sync", metavar="/path/to/dir", help="path to a directory containing json files describing chronos jobs. \ All sub-directories will be searched for files ending in .json") group.add_argument("--list", action="store_true", help="list jobs on chronos") parser.add_argument("-n", action="store_true", default=False, help="dry-run, don't actually push anything to chronos") args = parser.parse_args() c = chronos.connect(args.hostname) cjobs = c.list() if args.list: # cjobs isn't json but this still gets us the pretty print json.dumps(cjobs, sort_keys=True, indent=4) sys.exit(0) if args.sync: jobs = {} retry = {'update': [], 'add': []} for job in cjobs: jobs[job["name"]] = job if not os.path.isdir(args.sync): raise Exception("%s must be a directory" % args.sync) job_files = find_json_files(args.sync) for file in job_files: job = read_job_file(file) if not job: print "Skipping %s" % file else: if job['name'] in jobs: if check_update(jobs, job): print "Updating job %s from file %s" % (job['name'], file) if not args.n: try: c.update(job) except: retry['update'].append(job) else: print "Job %s defined in %s is up-to-date on Chronos" \ % (job['name'], file) else: print "Adding job %s from file %s" % (job['name'], file) if not args.n: try: c.add(job) except: retry['add'].append(job) attempt = 0 while (len(retry['update']) > 0 or len(retry['add']) > 0) and attempt < 10: attempt += 1 if len(retry['update']) > 0: job = retry['update'].pop(0) try: print "Retry %d for job %s" % (attempt, job['name']) c.update(job) except: retry['update'].append(job) if len(retry['add']) > 0: job = retry['add'].pop(0) try: print "Retry %d for job %s" % (attempt, job['name']) c.add(job) except: retry['add'].append(job) if len(retry['update']) > 0 or len(retry['add']) > 0: print "Failed Jobs: %s" % sorted((retry['update'] + retry['add']))