def gen_payload(self, services=None): log.debug('generating payload for services: %s', services) if services is None or services == 'all': services = self.get_services() if not isList(services): code_error('non-list passed to gen_payload') # determined from here: # https://community.hortonworks.com/questions/11111/is-there-a-way-to-execute-ambari-service-checks-in.html payload = [ { "RequestSchedule": { "batch": [ { "requests": [] }, { "batch_settings": { "batch_separation_in_seconds": 1, "task_failure_tolerance": 1 } } ] } } ] service_count = len(services) for index in range(service_count): service = services[index] index += 1 commandData = "" if service.upper() == "ZOOKEEPER" : # ZOOKEEPER service check command name is irregular ZOOKEEPER_QUORUM_SERVICE_CHECK, not ZOOKEEPER_SERVICE_CHECK commandData = "{service}_QUORUM_SERVICE_CHECK".format(service=service.upper()) else : commandData = "{service}_SERVICE_CHECK".format(service=service.upper()) payload[0]['RequestSchedule']['batch'][0]['requests'].append( { "order_id": index, "type": "POST", "uri": "/api/v1/clusters/{0}/requests".format(self.cluster), "RequestBodyInfo":{ "RequestInfo": { "command": "{commandData}".format(commandData=commandData) , "context": "{service} Service Check (batch {index} of {total})". format(service=service, index=index, total=service_count) }, "Requests/resource_filters":[ { "service_name": service.upper() } ] } } ) payload_str = json.dumps(payload) if log.isEnabledFor(logging.DEBUG): log.debug('generated payload:\n%s', jsonpp(payload_str)) return payload_str
def validate_protocol_opts(self): if self.is_option_defined('https') and self.get_opt('https'): self.protocol = 'https' # optparse returns string, even though default we gave from __init__ was int # comparison would fail without this cast if str(self.port) == '80': log.info('overriding port 80 => 443 for https') self.port = 443 elif self.is_option_defined('http') and self.get_opt('http'): self.protocol = 'http' if not self.port: self.port = 80 if self.is_option_defined('url') and self.get_opt('url'): self.url_path = self.get_opt('url') if self.url_path: if self.protocol is None: self.protocol = 'http' elif self.protocol == 'ping': self.usage( 'cannot specify --url-path with --ping, mutually exclusive options!' ) if self.is_option_defined('ping') and self.get_opt('ping'): if self.protocol: self.usage( 'cannot specify --ping with --http / --https, mutually exclusive tests!' ) elif self.port != self.default_port: self.usage( 'cannot specify --port with --ping, mutually exclusive options!' ) self.protocol = 'ping' if self.protocol and self.protocol not in ('http', 'https', 'ping'): code_error('invalid protocol, must be one of http / https / ping')
def get_ingestions(self, num=None, filter_opts=None): log.info('getting ingestion history') if num: chunk_size = num log.info('explicit number of results requested: %s', chunk_size) elif filter_opts: chunk_size = 10 log.info('filters detected, defaulting number of results to %s', chunk_size) else: chunk_size = 100 log.info('using catch all default result limit of %s', chunk_size) settings = {'chunkSize': chunk_size, 'currentPage': 1} if filter_opts is not None: if not isDict(filter_opts): code_error('passed non-dictionary for filter opts to get_ingestions') for key, value in sorted(filter_opts.items()): log.info("filter: '%s' = '%s'", key, value) settings = merge_dicts(settings, filter_opts) log.info('settings: %s', settings) log.info('querying Zaloni for ingestion history') (req, self.query_time) = self.req(url='{url_base}/ingestion/publish/getFileIndex' .format(url_base=self.url_base), # orders by newest first, but seems to return last 10 anyway body=json.dumps(settings)) try: log.info('parsing JSON response') json_dict = json.loads(req.content) except ValueError as _: qquit('UNKNOWN', 'error parsing json returned by Zaloni: {0}'.format(_)) return json_dict
def print_log(self, build=None, job_id=None): if job_id: self.print_job_log(job_id=job_id) log.info('=' * 80) log.info('end of log for job id %s', job_id) log.info('=' * 80 + '\n') else: if not build: code_error('no job id passed to print_log(), nor build to determine job from') log.info('getting job id for build #%s', build['number']) if 'jobs' not in build: raise UnknownError('no jobs field found in build, {0}'.format(support_msg_api)) for _ in build['jobs']: _id = _['id'] url = 'https://api.travis-ci.org/jobs/{id}'.format(id=_id) req = self.request_handler.get(url) # if this raises ValueError it'll be caught by run handler job_data = json.loads(req.content) if log.isEnabledFor(logging.DEBUG): log.debug("job id %s status:\n%s", _id, jsonpp(job_data)) if self.failed is True: if job_data['state'] == 'finished' and job_data['status'] in (None, 1, '1'): job = job_data else: job = job_data if not job: raise UnknownError('no job found in build {0}'.format(build['number'])) self.print_job_log(job=job) log.info('=' * 80) log.info('end of log for build number #%s job id %s', build['number'], job['id']) log.info('=' * 80 + '\n')
def parse(self, req): soup = BeautifulSoup(req.content, 'html.parser') last_heartbeat = None try: self.list_workers(soup) heartbeat_col_header = soup.find( 'th', text='Node Name').find_next_sibling().get_text() # make sure ordering of columns is as we expect so we're parsing the correct number for heartbeat lag if heartbeat_col_header != 'Last Heartbeat': code_error( "heartbeat column header '{}' != Last Heartbeat".format( heartbeat_col_header)) last_heartbeat = soup.find( 'th', text=self.node).find_next_sibling().get_text() if last_heartbeat is None: raise AttributeError except (AttributeError, TypeError): raise CriticalError("{0} worker '{1}' not found among list of live workers!"\ .format(self.software, self.node)) if not isInt(last_heartbeat): raise UnknownError("last heartbeat '{0}' for node '{1}' is not an integer, possible parsing error! {2}"\ .format(last_heartbeat, self.node, support_msg())) self.msg = "{0} worker '{1}' last heartbeat = {2} secs ago".format( self.software, self.node, last_heartbeat) self.check_thresholds(last_heartbeat) self.msg += ' | last_heartbeat={0}s{1}'.format( last_heartbeat, self.get_perf_thresholds())
def print_table_region_row_counts(self): if self.sort: if self.sort == 'count': log.info('sorting output by counts') if self.sort_desc: self._regions_meta.sort(key=lambda _: -_['row_count']) else: self._regions_meta.sort(key=lambda _: _['row_count']) elif self.sort == 'server': log.info('sorting output by server') self._regions_meta.sort(key=lambda _: _['server']) if self.sort_desc: self._regions_meta.reverse() else: code_error('--sort was not either count or server') print('=' * self.total_width) if not self.no_region_col: print('{0:{1}}{2}'.format(self.region_header, self.region_width, self.separator), end='') print('{0:{1}}{2}'.format(self.start_key_header, self.start_key_width, self.separator), end='') print('{0:{1}}{2}'.format(self.end_key_header, self.end_key_width, self.separator), end='') print('{0:{1}}{2}'.format(self.server_header, self.server_width, self.separator), end='') print('{0:{1}}{2}{3}'.format(self.row_count_header, self.row_count_width, self.separator, self.row_count_pc_header) ) print('=' * self.total_width) for region in self._regions_meta: if not self.no_region_col: print('{0:{1}}{2}'.format(region['name'], self.region_width, self.separator), end='') print('{0:{1}}{2}'.format(region['start_key'], self.start_key_width, self.separator), end='') print('{0:{1}}{2}'.format(region['end_key'], self.end_key_width, self.separator), end='') print('{0:{1}}{2}'.format(region['server'], self.server_width, self.separator), end='') print('{0:{1}}{2}{3:>10}'.format(region['row_count'], self.row_count_width, self.separator, region['pc']))
def gen_payload(self, services=None): log.debug('generating payload for services: %s', services) if services is None or services == 'all': services = self.get_services() if not isList(services): code_error('non-list passed to gen_payload') # determined from here: # https://community.hortonworks.com/questions/11111/is-there-a-way-to-execute-ambari-service-checks-in.html payload = [ { "RequestSchedule": { "batch": [ { "requests": [] }, { "batch_settings": { "batch_separation_in_seconds": 1, "task_failure_tolerance": 1 } } ] } } ] service_count = len(services) for index in range(service_count): service = services[index] index += 1 payload[0]['RequestSchedule']['batch'][0]['requests'].append( { "order_id": index, "type": "POST", "uri": "/api/v1/clusters/{0}/requests".format(self.cluster), "RequestBodyInfo":{ "RequestInfo": { "command": "{service}_SERVICE_CHECK".format(service=service.upper()), "context": "{service} Service Check (batch {index} of {total})". format(service=service, index=index, total=service_count) }, "Requests/resource_filters":[ { "service_name": service.upper() } ] } } ) payload_str = json.dumps(payload) if log.isEnabledFor(logging.DEBUG): log.debug('generated payload:\n%s', jsonpp(payload_str)) return payload_str
def collect_results(self): return_val = None for _ in self.host_list: return_val = self.que.get() if return_val: break if return_val: if isTuple(return_val): self.finish(*return_val) elif isStr(return_val): self.finish(return_val) else: code_error('collect_results() found non-tuple / non-string on que')
def validate_options(self): if not self.host_list: self.usage('no hosts specified') validate_hostport_list(self.host_list, port_optional=True) validate_port(self.port) if self.protocol and self.protocol not in ('http', 'https', 'ping'): code_error('invalid protocol, must be one of http or https') if self.regex: if not self.protocol: self.usage('--regex cannot be used without --http / --https') validate_regex(self.regex) self.regex = re.compile(self.regex) validate_int(self.num_threads, 'num threads', 1, 100)
def collect_results(self): return_val = None for _ in self.host_list: return_val = self.queue.get() if return_val: break if return_val: if isTuple(return_val): self.finish(*return_val) elif isStr(return_val): self.finish(return_val) else: code_error('collect_results() found non-tuple / non-string on que')
def calculate_key_percentages(self): log.info('calculating key percentages') # incremented instead now for one progress dot per 10k lines #for key_prefix in self.keys: # self.total_keys += self.keys[key_prefix]['count'] # make sure we don't run in to division by zero error if self.total_keys == 0: die("0 total keys detected!") if self.total_keys < 0: code_error("negative total keys detected!") for key_prefix in self.keys: self.keys[key_prefix]['pc'] = '{0:.2f}'.format( self.keys[key_prefix]['count'] / max(self.total_keys, 1) * 100)
def calculate_key_percentages(self): log.info('calculating key percentages') # incremented instead now for one progress dot per 10k lines #for key_prefix in self.keys: # self.total_keys += self.keys[key_prefix]['count'] # make sure we don't run in to division by zero error if self.total_keys == 0: die("0 total keys detected!") if self.total_keys < 0: code_error("negative total keys detected!") for key_prefix in self.keys: self.keys[key_prefix]['pc'] = '{0:.2f}'.format(self.keys[key_prefix]['count'] / max(self.total_keys, 1) * 100)
def get_latest_complete_report(reports): if not isList(reports): code_error('non-list passed to get_lastest_complete_report()') if not reports: qquit('UNKNOWN', 'no reports passed to get_latest_complete_report()') num_reports = len(reports) index = 0 report = reports[index] while report['status'] == 'INCOMPLETE': index += 1 if index < num_reports: report = reports[index] else: log.warn('only incomplete workflows detected, will have to use latest incomplete workflow') report = reports[0] return report
def run(self): if not self.job_id: if self.repo: latest_failed_build = self.get_latest_failed_build() self.job_id = self.get_failing_job_id_from_build(latest_failed_build) else: code_error('--job-id / --repo not specified, caught late') if self.job_id is None: raise UnknownError('no job id was found, aborting getting SSH address') self.launch_job() ssh_address = self.get_ssh_address(job_id=self.job_id) log.info('Executing: ssh -- {0}'.format(ssh_address)) sys.stdout.flush() sys.stderr.flush() self.disable_timeout() os.execvp('ssh', ['--', ssh_address])
def exception_handler(self, arg): # pylint: disable=no-self-use if not issubclass(type(arg), Exception): code_error('RequestHandler.exception_handler arg {} is not a subclass of Exception'.format(arg)) # TODO: improve this to extract connection refused for more concise errors errhint = '' if 'BadStatusLine' in str(arg.message): errhint = ' (possibly connecting to an SSL secured port using plain HTTP?)' elif 'https://' in self.url and 'unknown protocol' in str(arg.message): errhint = ' (possibly connecting to a plain HTTP port with the -S / --ssl switch enabled?)' _type = type(arg).__name__ msg = str(arg) if 'Connection refused' in msg: msg = 'Connection refused' #msg += to {host}:{port}'.format(arg.host, arg.port) raise CriticalError('{type}: {exception}{errhint}'.format(type=_type, exception=msg, errhint=errhint))
def check_last_ingest_age(self, results, max_age): log.info('checking last ingest age') if not isList(results): code_error('passed non-list to check_last_ingest_age()') # newest is first # effectiveDate is null in testing (docs says it's a placeholder for future use) # using ingestionTimeFormatted instead, could also use ingestionTime which is timestamp in millis ingestion_date = results[0]['ingestionTimeFormatted'] age_timedelta = self.get_timedelta(ingestion_date=ingestion_date) if self.verbose: self.msg += ", last ingest start date = '{ingestion_date}'".format( ingestion_date=ingestion_date) self.msg += ', started {0} ago'.format( sec2human(age_timedelta.seconds)) if max_age is not None and age_timedelta.seconds > (max_age * 60.0): self.warning() self.msg += ' (last run started more than {0} min{1} ago!)'.format( str(max_age).rstrip('0').rstrip('.'), plural(max_age)) return age_timedelta
def request_service_checks(self, services): log.debug('requesting service checks for services: %s', services) if not isList(services): code_error('non-list passed to request_service_checks') url_suffix = '/clusters/{cluster}/request_schedules'.format(cluster=self.cluster) payload = self.gen_payload(services) log.info('sending batch schedule check request for services: ' + ', '.join(services)) content = self.post(url_suffix=url_suffix, data=payload) try: _ = json.loads(content) request_schedule_id = _['resources'][0]['RequestSchedule']['id'] log.info('RequestSchedule %s submitted', request_schedule_id) href = _['resources'][0]['href'] assert href == self.url_base.rstrip('/') + '/clusters/{0}/request_schedules/{1}'\ .format(self.cluster, request_schedule_id) if self.watch: self.watch_scheduled_request(request_schedule_id) except (KeyError, ValueError) as _: die('parsing schedule request response failed: ' + str(_) + '. ' + support_msg_api())
def check_last_ingest_age(self, results, max_age): log.info('checking last ingest age') if not isList(results): code_error('passed non-list to check_last_ingest_age()') # newest is first # effectiveDate is null in testing (docs says it's a placeholder for future use) # using ingestionTimeFormatted instead, could also use ingestionTime which is timestamp in millis ingestion_date = results[0]['ingestionTimeFormatted'] age_timedelta = self.get_timedelta(ingestion_date=ingestion_date) age_timedelta_secs = self.timedelta_seconds(age_timedelta) if self.verbose: self.msg += ", last ingest start date = '{ingestion_date}'".format(ingestion_date=ingestion_date) self.msg += ', started {0} ago'.format(sec2human(age_timedelta_secs)) if max_age is not None and age_timedelta_secs > (max_age * 60.0): self.warning() self.msg += ' (last run started more than {0} min{1} ago!)'.format(str(max_age) .rstrip('0') .rstrip('.'), plural(max_age)) return age_timedelta_secs
def request_service_checks(self, services): log.debug('requesting service checks for services: %s', services) if not isList(services): code_error('non-list passed to request_service_checks') url_suffix = '/clusters/{cluster}/request_schedules'.format(cluster=self.cluster) payload = self.gen_payload(services) log.info('sending batch schedule check request for services: ' + ', '.join(services)) content = self.post(url_suffix=url_suffix, data=payload) try: _ = json.loads(content) request_schedule_id = _['resources'][0]['RequestSchedule']['id'] log.info('RequestSchedule %s submitted', request_schedule_id) href = _['resources'][0]['href'] if href != self.url_base.rstrip('/') + '/clusters/{0}/request_schedules/{1}'\ .format(self.cluster, request_schedule_id): raise ValueError('href does not match expected request_schedules URI!') if self.watch: self.watch_scheduled_request(request_schedule_id) except (KeyError, ValueError) as _: die('parsing schedule request response failed: ' + str(_) + '. ' + support_msg_api())
def parse(self, req): soup = BeautifulSoup(req.content, 'html.parser') last_heartbeat = None try: self.list_workers(soup) heartbeat_col_header = soup.find('th', text='Node Name').find_next_sibling().get_text() # make sure ordering of columns is as we expect so we're parsing the correct number for heartbeat lag if heartbeat_col_header != 'Last Heartbeat': code_error("heartbeat column header '{}' != Last Heartbeat".format(heartbeat_col_header)) last_heartbeat = soup.find('th', text=self.node).find_next_sibling().get_text() if last_heartbeat is None: raise AttributeError except (AttributeError, TypeError): raise CriticalError("{0} worker '{1}' not found among list of live workers!"\ .format(self.software, self.node)) if not isInt(last_heartbeat): raise UnknownError("last heartbeat '{0}' for node '{1}' is not an integer, possible parsing error! {2}"\ .format(last_heartbeat, self.node, support_msg())) self.msg = "{0} worker '{1}' last heartbeat = {2} secs ago".format(self.software, self.node, last_heartbeat) self.check_thresholds(last_heartbeat) self.msg += ' | last_heartbeat={0}s{1}'.format(last_heartbeat, self.get_perf_thresholds())
def check_statuses(self, results): # known statuses from doc: SUCCESS / INGESTION FAILED / WORKFLOW FAILED / INCOMPLETE log.info('checking statuses') result_statuses = {} num_results = len(results) for item in results: status = item['status'] result_statuses[status] = result_statuses.get(status, 0) result_statuses[status] += 1 if not result_statuses: code_error('no ingestion status results parsed') if 'SUCCESS' not in result_statuses: self.msg += 'NO SUCCESSFUL INGESTS in history of last {0} ingest runs! '.format(num_results) self.warning() self.msg += 'ingestion{0} status: '.format(plural(num_results)) for status in result_statuses: if status not in ('SUCCESS', 'INCOMPLETE'): self.critical() self.msg += '{0} = {1} time{2}, '.format(status, result_statuses[status], plural(result_statuses[status])) self.msg = self.msg.rstrip(', ') return result_statuses
def req(self, url, method='post', body=None): if not isStr(method): code_error('non-string method passed to req()') log.debug('%s %s', method.upper(), url) headers = { "Content-Type": "application/json", "Accept": "application/json", "JSESSIONID": self.jsessionid } log.debug('headers: %s', headers) start_time = time.time() try: req = getattr(requests, method.lower())( url, #cookies=self.jar, data=body, headers=headers) for cookie_tuple in req.cookies.items(): if cookie_tuple[0] == 'JSESSIONID': self.jsessionid = cookie_tuple[1].rstrip('/') timing = time.time() - start_time except requests.exceptions.RequestException as _: qquit('CRITICAL', _) if log.isEnabledFor(logging.DEBUG): log.debug("response: %s %s", req.status_code, req.reason) content = req.content try: content = jsonpp(req.content).strip() except ValueError: pass log.debug("content:\n%s\n%s\n%s", '=' * 80, content, '=' * 80) if req.status_code != 200: info = '' try: info = ': {0}'.format(json.loads(req.content)['result']) except (KeyError, ValueError): pass qquit('CRITICAL', "%s %s%s" % (req.status_code, req.reason, info)) return (req, timing)
def print_job_log(self, job=None, job_id=None): #if (self.color or not self.plaintext) and 'log' in job: if not job and not job_id: code_error('no job data or job id passed to print_job_log()') content = None if job is not None: if 'log' in job and job['log']: content = job['log'] else: job_id = job['id'] if not content: url = 'https://api.travis-ci.org/jobs/{id}/log.txt?deansi=true'.format(id=job_id) req = self.request_handler.get(url) content = req.content content = re.sub(r'\r', '', content) #if self.plaintext: # leaves a few characters behind which are printable #content = re.sub('[^{0}]'.format(string.printable), '', content) # mandatory stripping ANSI control sequences for now as although color coding is nice # Travis has too many other control sequences that mess up my terminal # strip all control sequences content = strip_ansi_escape_codes(content) print(content)
def req(self, url, method='post', body=None): if not isStr(method): code_error('non-string method passed to req()') log.debug('%s %s', method.upper(), url) headers = {"Content-Type": "application/json", "Accept": "application/json", "JSESSIONID": self.jsessionid} log.debug('headers: %s', headers) start_time = time.time() try: req = getattr(requests, method.lower())(url, #cookies=self.jar, data=body, headers=headers) for cookie_tuple in req.cookies.items(): if cookie_tuple[0] == 'JSESSIONID': self.jsessionid = cookie_tuple[1].rstrip('/') timing = time.time() - start_time except requests.exceptions.RequestException as _: qquit('CRITICAL', _) if log.isEnabledFor(logging.DEBUG): log.debug("response: %s %s", req.status_code, req.reason) content = req.content try: content = jsonpp(req.content).strip() except ValueError: pass log.debug("content:\n%s\n%s\n%s", '='*80, content, '='*80) if req.status_code != 200: info = '' try: info = ': {0}'.format(json.loads(req.content)['result']) except (KeyError, ValueError): pass qquit('CRITICAL', "%s %s%s" % (req.status_code, req.reason, info)) return (req, timing)