def check(self, client):
     log.info('running Docker info')
     info = client.info()
     if log.isEnabledFor(logging.DEBUG):
         log.debug(jsonpp(info))
     containers = info['Containers']
     running_containers = info['ContainersRunning']
     paused_containers = info['ContainersPaused']
     stopped_containers = info['ContainersStopped']
     self.msg = 'Docker '
     if self.running:
         self.msg += 'running containers = {}'.format(running_containers)
         self.check_thresholds(running_containers)
         self.msg += ' | running_containers={}{}'.format(running_containers, self.get_perf_thresholds())
     elif self.paused:
         self.msg += 'paused containers = {}'.format(paused_containers)
         self.check_thresholds(paused_containers)
         self.msg += ' | paused_containers={}{}'.format(paused_containers, self.get_perf_thresholds())
     elif self.stopped:
         self.msg += 'stopped containers = {}'.format(stopped_containers)
         self.check_thresholds(stopped_containers)
         self.msg += ' | stopped_containers={}{}'.format(stopped_containers, self.get_perf_thresholds())
     elif self.total:
         self.msg += 'total containers = {}'.format(containers)
         self.check_thresholds(containers)
         self.msg += ' | total_containers={}{}'.format(containers, self.get_perf_thresholds())
     else:
         self.msg += 'containers = {}, running containers = {}, paused containers = {}, stopped containers = {}'\
                    .format(containers, running_containers, paused_containers, stopped_containers)
         self.msg += ' | containers={} running_containers={} paused_containers={} stopped_containers={}'\
                     .format(containers, running_containers, paused_containers, stopped_containers)
Example #2
0
 def connection(self, host, port, user, password, ssl=False, **kwargs):
     # must set X-Requested-By in newer versions of Ambari
     self.x_requested_by = user
     if user == 'admin':
         self.x_requested_by = os.getenv('USER', user)
     #log.info("contacting Ambari as '%s'" % self.user)
     if not isHost(host) or not isPort(port) or not isUser(user) or not password:
         raise InvalidOptionException('invalid options passed to AmbariBlueprint()')
     proto = 'http' # pylint: disable=unused-variable
     if ssl:
         proto = 'https'
     self.host = host
     self.port = port
     self.user = user
     self.password = password
     # if kwargs.has_key('strip_config') and kwargs['strip_config']:
     if 'strip_config' in kwargs and kwargs['strip_config']:
         self.strip_config = True
     self.url_base = '%(proto)s://%(host)s:%(port)s/api/v1' % locals()
     if 'dir' in kwargs and kwargs['dir']:
         self.blueprint_dir = kwargs['dir']
     if not isDirname(self.blueprint_dir):
         qquit('UNKNOWN', 'invalid dir arg passed to AmbariBlueprintTool')
     try:
         if not self.blueprint_dir or not os.path.exists(self.blueprint_dir):
             log.info("creating blueprint data dir '%s'" % self.blueprint_dir)
             os.mkdir(self.blueprint_dir)
         if not os.path.isdir(self.blueprint_dir):
             raise IOError("blueprint dir '%s'already taken and is not a directory" % self.blueprint_dir)
     except IOError as _:
         die("'failed to create dir '%s': %s" % (self.blueprint_dir, _))
 def output(self, connect_time, total_time):
     precision = self.precision
     cell_info = "HBase table '{0}' row '{1}' column '{2}'".format(self.table, self.row, self.column)
     value = self.value
     self.msg = "cell value = '{0}'".format(value)
     if isFloat(value):
         log.info('value is float, checking thresholds')
         self.check_thresholds(value)
     self.msg += " for {0}".format(cell_info)
     query_time = self.timings[self.column]['read']
     perfdata = ''
     perfdata += ' total_time={0:0.{precision}f}ms'.format(total_time, precision=precision)
     perfdata += ' connect_time={0:0.{precision}f}ms'.format(connect_time, precision=precision)
     perfdata += ' query_time={0:0.{precision}f}ms'.format(query_time, precision=precision)
     # show the timings at the end of the user output as well as in the graphing perfdata section
     self.msg += ',' + perfdata
     self.msg += ' |'
     if self.graph:
         if isFloat(value):
             self.msg += ' value={0}'.format(value)
             if self.units:
                 self.msg += str(self.units)
             self.msg += self.get_perf_thresholds()
         else:
             self.msg += ' value=NaN'
     self.msg += perfdata
 def parse_json(self, json_data):
     log.info('parsing response')
     try:
         bean = json_data['beans'][0]
         space_used_pc = bean['PercentUsed']
         # the way below is more informative
         #assert type(space_used_pc) == float
         if re.search(r'e-\d+$', str(space_used_pc)):
             space_used_pc = 0
         if not isFloat(space_used_pc):
             raise UnknownError("non-float returned for PercentUsed by namenode '{0}:{1}'"\
                                .format(self.host, self.port))
         assert space_used_pc >= 0
         stats = {}
         for stat in ('Total', 'TotalBlocks', 'TotalFiles', 'Used'):
             stats[stat] = bean[stat]
             if not isInt(stats[stat]):
                 raise UnknownError("non-integer returned for {0} by namenode '{1}:{2}'"\
                                    .format(stat, self.host, self.port))
             stats[stat] = int(stats[stat])
         self.ok()
         self.msg = 'HDFS space used = {0:.2f}% ({1}/{2})'\
                    .format(space_used_pc, humanize.naturalsize(stats['Used']), humanize.naturalsize(stats['Total']))
         self.check_thresholds(space_used_pc)
         self.msg += ", in {0:d} files spread across {1:d} blocks".format(stats['TotalFiles'], stats['TotalBlocks'])
         self.msg += " | 'HDFS % space used'={0:f}%{1}".format(space_used_pc, self.get_perf_thresholds())
         self.msg += " 'HDFS space used'={0:d}b".format(stats['Used'])
         self.msg += " 'HDFS file count'={0:d}".format(stats['TotalFiles'])
         self.msg += " 'HDFS block count'={0:d}".format(stats['TotalBlocks'])
     except KeyError as _:
         raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\
                            .format(self.host, self.port, _, support_msg_api()))
     except ValueError as _:
         raise UnknownError("invalid json returned for LiveNodes by Namenode '{0}:{1}': {2}"\
                            .format(self.host, self.port, _))
 def check_app_elapsed_times(self, app_list):
     num_apps_breaching_sla = 0
     max_elapsed = 0
     matching_apps = 0
     max_threshold_msg = ''
     # save msg as check_thresholds appends to it which we want to reset in this case
     msg = self.msg
     for app in app_list:
         if not self.app_selector(app):
             continue
         name = app['name']
         matching_apps += 1
         elapsed_time = app['elapsedTime']
         assert isInt(elapsed_time)
         elapsed_time = int(elapsed_time / 1000)
         threshold_msg = self.check_thresholds(elapsed_time)
         if threshold_msg:
             num_apps_breaching_sla += 1
             log.info("app '%s' is breaching SLA", name)
         if elapsed_time > max_elapsed:
             max_elapsed = elapsed_time
             max_threshold_msg = threshold_msg
     if max_threshold_msg:
         max_threshold_msg = ' ' + max_threshold_msg
     # restore msg prefix as check_thresholds appends every threshold breach
     self.msg = msg
     return (num_apps_breaching_sla, matching_apps, max_elapsed, max_threshold_msg)
Example #6
0
 def get_tables(self):
     log.info('getting table list')
     try:
         process = subprocess.Popen(['hbase', 'shell'], stdin=PIPE, stdout=PIPE, stderr=subprocess.STDOUT)
         (stdout, _) = process.communicate('list')
         process.wait()
         if process.returncode != 0:
             print('ERROR:', end='')
             die(stdout)
         lines = stdout.split('\n')
         lineno = 1
         for line in lines:
             if self.table_list_header_regex.search(line):
                 break
             lineno += 1
         if lineno > len(lines):
             die("Failed to parse table list output (couldn't find the starting line TABLE)")
         tables = set()
         for line in lines[lineno:]:
             if self.table_list_end_regex.search(line):
                 break
             line = line.strip()
             if not line:
                 continue
             tables.add(line)
         return tables
     except OSError as _:
         die("OSError running hbase shell to list tables: {0}".format(_))
     except subprocess.CalledProcessError as _:
         print('Failed to get tables using HBase shell:\n')
         print(_.output)
         sys.exit(_.returncode)
Example #7
0
 def run(self):
     tables = self.get_tables()
     if not tables:
         die('No Tables Found')
     if self.get_opt('list_tables'):
         print('Tables:\n\n' + '\n'.join(tables))
         sys.exit(3)
     tables_to_flush = set()
     if self.table_regex:
         log.info('filtering tables based on regex')
         for table in sorted(list(tables)):
             if self.table_regex.search(table):
                 tables_to_flush.add(table)
     else:
         tables_to_flush = sorted(list(tables))
     if log.isEnabledFor(logging.INFO):
         log.info('Flushing tables:\n\n%s\n', '\n'.join(tables_to_flush))
     flush_commands = '\n'.join(["flush '{0}'".format(table) for table in tables_to_flush])
     try:
         # by having stdout and stderr go to the same place more likely the output will be in a sane order
         process = subprocess.Popen(['hbase', 'shell'], stdin=PIPE, stdout=PIPE, stderr=subprocess.STDOUT)
         (stdout, _) = process.communicate(input=flush_commands)
         process.wait()
         if process.returncode != 0:
             print('ERROR:', end='')
             die(stdout)
         print(stdout)
     except OSError as _:
         die("OSError running hbase shell to flush tables: {0}".format(_))
     except subprocess.CalledProcessError as _:
         print('Failed to get tables using HBase shell:\n')
         print(_.output)
         sys.exit(_.returncode)
 def run(self):
     expected = self.get_opt("expected")
     if expected is not None:
         validate_regex(expected)
         log.info("expected version regex: %s", expected)
     cmd = "consul version"
     log.debug("cmd: " + cmd)
     proc = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
     (stdout, _) = proc.communicate()
     log.debug("stdout: " + str(stdout))
     returncode = proc.wait()
     log.debug("returncode: " + str(returncode))
     if returncode != 0 or (stdout is not None and "Error" in stdout):
         raise CriticalError("consul returncode: {0}, output: {1}".format(returncode, stdout))
     version = None
     for line in str(stdout).split("\n"):
         match = self.version_regex.match(line)
         if match:
             version = match.group(1)
     if not version:
         raise UnknownError(
             "Consul version not found in output. Consul output may have changed. {0}".format(support_msg())
         )
     if not isVersion(version):
         raise UnknownError("Consul version unrecognized '{0}'. {1}".format(version, support_msg()))
     self.ok()
     self.msg = "Consul version = {0}".format(version)
     if expected is not None and not re.search(expected, version):
         self.msg += " (expected '{0}')".format(expected)
         self.critical()
 def run(self):
     start = time.time()
     self._read_value = self.read()
     stop = time.time()
     self._read_timing = stop - start
     log.info('read in %s secs', self._read_timing)
     log.info("value = '%s'", self._read_value)
 def check(self, client):
     log.info('running API ping')
     if client.ping():
         self.msg = 'Docker API Ping successful'
     else:
         self.critical()
         self.msg = 'Docker API Ping Failed'
 def run(self):
     expected = self.get_opt('expected')
     if expected is not None:
         validate_regex(expected)
         log.info('expected version regex: %s', expected)
     cmd = 'nodetool version'
     log.debug('cmd: ' + cmd)
     proc = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
     (stdout, _) = proc.communicate()
     log.debug('stdout: ' + str(stdout))
     returncode = proc.wait()
     log.debug('returncode: ' + str(returncode))
     if returncode != 0 or (stdout is not None and 'Error' in stdout):
         raise CriticalError('nodetool returncode: {0}, output: {1}'.format(returncode, stdout))
     version = None
     for line in str(stdout).split('\n'):
         match = self.version_regex.match(line)
         if match:
             version = match.group(1)
     if not version:
         raise UnknownError('Cassandra version not found in output. Nodetool output may have changed. {0}'.
                            format(support_msg()))
     if not isVersion(version):
         raise UnknownError('Cassandra version unrecognized \'{0}\'. {1}'.format(version, support_msg()))
     self.ok()
     self.msg = 'Cassandra version = {0}'.format(version)
     if expected is not None and not re.search(expected, version):
         self.msg += " (expected '{0}')".format(expected)
         self.critical()
 def run(self):
     if not self.args:
         self.usage('no git directory args given')
     self.origin = self.get_opt('origin')
     args = uniq_list_ordered(self.args)
     self.branch_prefix = self.get_opt('branch_prefix')
     if self.branch_prefix is not None:
         validate_regex(self.branch_prefix, 'branch prefix')
         self.branch_prefix = re.compile(self.branch_prefix)
     for arg in args:
         if not os.path.exists(arg):
             print("'%s' not found" % arg)
             sys.exit(ERRORS['WARNING'])
         if os.path.isfile(arg):
             log_option('file', arg)
         elif os.path.isdir(arg):
             log_option('directory', arg)
         else:
             die("path '%s' could not be determined as either a file or directory" % arg)
     for arg in args:
         self.check_git_branches_upstream(arg)
     if self.status == "OK":
         log.info('SUCCESS - All Git branches are tracking the expected upstream origin branches')
     else:
         log.critical('FAILED')
         sys.exit(ERRORS['CRITICAL'])
    def run(self):
        self.no_args()
        host = self.get_opt('host')
        port = self.get_opt('port')
        validate_host(host)
        validate_port(port)

        log.info('querying Tachyon Master')
        url = 'http://%(host)s:%(port)s/workers' % locals()
        log.debug('GET %s' % url)
        try:
            req = requests.get(url)
        except requests.exceptions.RequestException as _:
            qquit('CRITICAL', _)
        log.debug("response: %s %s" % (req.status_code, req.reason))
        log.debug("content:\n{0}\n{1}\n{2}".format('='*80, req.content.strip(), '='*80))
        if req.status_code != 200:
            qquit('CRITICAL', "%s %s" % (req.status_code, req.reason))
        soup = BeautifulSoup(req.content, 'html.parser')
        dead_workers = 0
        try:
            dead_workers = len([_ for _ in soup.find(id='data2').find('tbody').find_all('tr') if _])
        except (AttributeError, TypeError):
            qquit('UNKNOWN', 'failed to find parse Tachyon Master info for dead workers' % self.__dict__)
        try:
            dead_workers = int(dead_workers)
        except (ValueError, TypeError):
            qquit('UNKNOWN', 'Tachyon Master dead workers parsing returned non-integer: {0}'.format(dead_workers))
        self.msg = 'Tachyon dead workers = {0}'.format(dead_workers)  # pylint: disable=attribute-defined-outside-init
        self.ok()
        # TODO: thresholds on number of dead workers (coming soon)
        if dead_workers:
            self.critical()
 def parse_json(self, json_data):
     if not isList(json_data):
         raise UnknownError('non-list returned by Presto for nodes. {0}'.format(support_msg_api()))
     nodes_lagging = []
     max_lag = 0
     re_protocol = re.compile('^https?://')
     num_nodes = len(json_data)
     for node_item in json_data:
         last_response_time = node_item['lastResponseTime']
         last_response_datetime = datetime.strptime(last_response_time, '%Y-%m-%dT%H:%M:%S.%fZ')
         timedelta = datetime.utcnow() - last_response_datetime
         response_age = int(timedelta.total_seconds())
         if response_age > max_lag:
             max_lag = response_age
         if response_age > self.max_age:
             uri = node_item['uri']
             uri = re_protocol.sub('', uri)
             nodes_lagging += [uri]
             log.info("node '%s' last response age %d secs > max age %s secs",
                      node_item['uri'], response_age, self.max_age)
         else:
             log.info("node '%s' last response age %d secs", node_item['uri'], response_age)
     num_nodes_lagging = len(nodes_lagging)
     self.msg = 'Presto SQL - worker nodes with response timestamps older than {0:d} secs = {1:d}'\
                .format(self.max_age, num_nodes_lagging)
     self.check_thresholds(num_nodes_lagging)
     self.msg += ' out of {0:d} nodes'.format(num_nodes)
     if num_nodes < 1:
         self.warning()
         self.msg += ' (< 1 worker found)'
     self.msg += ', current max response age = {0:.2f} secs'.format(max_lag)
     if self.verbose and nodes_lagging:
         self.msg += ' [{0}]'.format(', '.join(nodes_lagging))
     self.msg += ' | num_nodes_lagging={0}{1} max_response_age={2:.2f}s'\
                 .format(num_nodes_lagging, self.get_perf_thresholds(), max_lag)
 def check_git_branches_upstream(self, target):
     target = os.path.abspath(target)
     gitroot = find_git_root(target)
     if gitroot is None:
         die('Failed to find git root for target {0}'.format(target))
     log.debug("finding branches for target '{0}'".format(target))
     repo = git.Repo(gitroot)
     branches = repo.branches
     if self.branch_prefix is not None:
         log.debug('restricting to branches matching branch prefix')
         branches = [x for x in branches if self.branch_prefix.match(str(x))]
         if not branches:
             log.error("No branches matching '%s' for target '%s'", self.get_opt('branch_prefix'), target)
             self.status = 'NO BRANCHES'
     #if log.isEnabledFor(logging.DEBUG):
     #log.debug('\n\nbranches for target %s:\n\n%s\n', target, '\n'.join(list(branches)))
     for branch in branches:
         expected = '{0}/{1}'.format(self.origin, branch)
         tracking_branch = str(branch.tracking_branch())
         if tracking_branch == expected:
             log.info("OK: repo '{0}' branch '{1}' is tracking '{2}'"
                      .format(gitroot, branch, tracking_branch))
         else:
             self.status = "ERROR"
             log.error("BAD: branch '{0}' is tracking '{1}' (expected '{2}')"
                       .format(branch, tracking_branch, expected))
 def process_args(self):
     args = uniq_list_ordered(self.args)
     if not args:
         self.usage('no directories specified as arguments')
     log_option('directories', args)
     self.compare_by_name = self.get_opt('name')
     self.compare_by_size = self.get_opt('size')
     self.compare_by_checksum = self.get_opt('checksum')
     self.regex = self.get_opt('regex')
     self.quiet = self.get_opt('quiet')
     self.no_short_circuit = self.get_opt('no_short_circuit')
     self.include_dot_dirs = self.get_opt('include_dot_dirs')
     if self.regex:
         if '(' not in self.regex:
             log.info('regex no capture brackets specified, will capture entire given regex')
             self.regex = '(' + self.regex + ')'
         validate_regex(self.regex)
         self.re_compiled = re.compile(self.regex, re.I)
     if not (self.compare_by_name or self.compare_by_size or self.compare_by_checksum or self.regex):
         self.compare_by_name = True
         #self.compare_by_size = True
         self.compare_by_checksum = True
     log_option('compare by name', self.compare_by_name)
     log_option('compare by size', self.compare_by_size)
     log_option('compare by checksum', self.compare_by_checksum)
     log_option('compare by regex', True if self.regex else False)
     return args
 def check_version(self, filename, branch, branch_base, arg_var, found_version, branch_version):
     self.branches_dockerfile_checked.add(branch)
     self.dockerfiles_checked.add(filename)
     if arg_var:
         log.debug("found arg '%s'", arg_var)
         arg_version = "ARG '{0}={1}'".format(arg_var, found_version)
     else:
         arg_version = "'{0}'".format(found_version)
     #log.debug("arg '%s' matches branch base '%s'", argversion.group(1), branch_base)
     log.debug("comparing '%s' contents to version derived from branch '%s' => '%s'",
               filename, branch, branch_version)
     if not isVersion(branch_version.lstrip('jdk').lstrip('jre')):
         die("unrecognized branch version '{0}' for branch_base '{1}'"
             .format(branch_version, branch_base))
     #if branch_version == found_version or branch_version == found_version.split('.', 2)[0]:
     if found_version[0:len(branch_version)] == branch_version:
         log.info("{0} version '{1}' matches {2}".
                  format(self.valid_git_branches_msg, branch_version, arg_version))
     else:
         log.error("{0} version '{1}' vs Dockerfile {2}".
                   format(self.invalid_git_branches_msg, branch_version, arg_version))
         self.dockerfiles_failed += 1
         self.branches_failed.add(branch)
         return False
     return True
    def is_file_dup_by_hash(self, filepath):
        checksum = None
        size = self.is_file_dup_by_size(filepath)
        if size:
            log.info("found file '%s' of matching size '%s' bytes", filepath, size)
            checksum = self.hash(filepath)
            self.sizes[size][filepath] = checksum
            self.hashes[checksum] = self.hashes.get(checksum, set())
            self.hashes[checksum].add(filepath)
        else:
            self.sizes[size] = {}
            self.sizes[size][filepath] = None

        sizeitem = self.sizes[size]
        if len(sizeitem) < 2:
            pass
        elif len(sizeitem) == 2:
            for filepath in sizeitem:
                if sizeitem[filepath] is None:
                    log.info("backtracking to now hash first file '%s'", filepath)
                    checksum = self.hash(filepath)
                    sizeitem[filepath] = checksum
                    self.hashes[checksum] = self.hashes.get(checksum, set())
                    self.hashes[checksum].add(filepath)
        if checksum is not None and len(self.hashes[checksum]) > 1:
            self.dups_by_hash[checksum] = self.dups_by_hash.get(checksum, set())
            for filepath in self.hashes[checksum]:
                self.dups_by_hash[checksum].add(filepath)
            return True
        return False
    def run(self):
        self.no_args()
        host = self.get_opt('host')
        port = self.get_opt('port')
        validate_host(host)
        validate_port(port)

        log.info('querying Tachyon Master')
        url = 'http://%(host)s:%(port)s/home' % locals()
        log.debug('GET %s' % url)
        try:
            req = requests.get(url)
        except requests.exceptions.RequestException as _:
            qquit('CRITICAL', _)
        log.debug("response: %s %s" % (req.status_code, req.reason))
        log.debug("content:\n{0}\n{1}\n{2}".format('='*80, req.content.strip(), '='*80))
        if req.status_code != 200:
            qquit('CRITICAL', "Non-200 response! %s %s" % (req.status_code, req.reason))
        soup = BeautifulSoup(req.content, 'html.parser')
        try:
            running_workers = soup.find('th', text=re.compile(r'Running\s+Workers:?', re.I))\
                .find_next_sibling().get_text()
        except (AttributeError, TypeError):
            qquit('UNKNOWN', 'failed to find parse Tachyon Master info for running workers' % self.__dict__)
        try:
            running_workers = int(running_workers)
        except (ValueError, TypeError):
            qquit('UNKNOWN', 'Tachyon Master live workers parsing returned non-integer: {0}'.format(running_workers))
        self.msg = 'Tachyon running workers = {0}'.format(running_workers)  # pylint: disable=attribute-defined-outside-init
        self.ok()
        # TODO: thresholds on number of live workers (coming soon)
        if running_workers < 1:
            self.critical()
Example #20
0
    def run(self):
        self.no_args()
        host = self.get_opt('host')
        port = self.get_opt('port')
        validate_host(host)
        validate_port(port)
        self.validate_thresholds(integer=False)

        url = 'http://%(host)s:%(port)s/master-status' % locals()
        log.debug('GET %s', url)
        try:
            req = requests.get(url)
        except requests.exceptions.RequestException as _:
            qquit('CRITICAL', _)
        log.debug("response: %s %s", req.status_code, req.reason)
        log.debug("content:\n%s\n%s\n%s", '='*80, req.content.strip(), '='*80)
        if req.status_code != 200:
            qquit('CRITICAL', ("%s %s" % (req.status_code, req.reason)))
        self.parse_output(req.content)
        log.info('server with min regions = %s regions on %s', self.server_min_regions[1], self.server_min_regions[0])
        log.info('server with max regions = %s regions on %s', self.server_max_regions[1], self.server_max_regions[0])
        imbalance = self.calculate_imbalance()
        self.msg = '{0}% region imbalance'.format(imbalance)
        self.check_thresholds(imbalance)
        self.msg += ' between HBase RegionServers hosting the most vs least number of regions'
        self.msg += ' (min = {0}, max = {1})'.format(self.server_min_regions[1], self.server_max_regions[1])
        self.msg += " | '% region imbalance'={0}%".format(imbalance)
        self.msg += self.get_perf_thresholds()
        self.msg += ' min_regions={0} max_regions={1}'.format(self.server_min_regions[1], self.server_max_regions[1])
 def consume(self):
     self.check_connection()
     self.check_channel()
     def connection_timeout_handler():
         raise CriticalError("unique message not returned on queue '{queue}' within {secs:.2f} secs"\
                             .format(queue=self.queue, secs=self.timeout / 3) + \
                             ", consumer timed out while consuming messages from {name} broker '{host}:{port}'"\
                             .format(name=self.name, host=self.host, port=self.port))
     self.conn.add_timeout(self.timeout / 3, connection_timeout_handler)
     # don't re-declare, queue should still exist otherwise error out
     #channel.queue_declare(queue = 'hello')
     # don't ack as messages could stay in queue indefinitely
     self.consumer_tag = self.channel.basic_consume(self.consumer_callback,
                                                    queue=self.queue,
                                                    # let broker autogenerate consumer_tag
                                                    # consumer_tag = self.consumer_tag),
                                                    no_ack=self.no_ack
                                                   )
     # could also use non-callback mechanism - generator that yields tuples (method, properties, body)
     # requires self.channel.cancel() from within loop
     # self.channel.consume(self.queue,
     #                      no_ack = True,
     #                      exclusive = True,
     #                      arguments = None,
     #                      inactivity_timeout = self.timeout/3)
     log.debug('start consuming')
     self.channel.start_consuming()
     # could instead use basic_get to return single message
     # self.channel.basic_get(queue = self.queue, no_ack = True)
     log.info('closing connection to broker')
     self.conn.close(reply_code=200, reply_text='Normal shutdown')
     return self.consumed_message
 def run(self):
     self.no_args()
     directory = self.get_opt('directory')
     validate_directory(directory)
     directory = os.path.abspath(directory)
     self.remote = self.get_opt('remote')
     validate_chars(self.remote, 'remote', r'A-Za-z0-9_\.-')
     try:
         repo = git.Repo(directory)
     except InvalidGitRepositoryError as _:
         raise CriticalError("directory '{}' does not contain a valid Git repository!".format(directory))
     try:
         if not self.get_opt('no_fetch'):
             log.info('fetching from remote repo: {}'.format(self.remote))
             repo.git.fetch(self.remote)
         branch = repo.active_branch
         log.info('active branch: %s', branch)
         commits_behind = repo.iter_commits('{branch}..{remote}/{branch}'.format(remote=self.remote, branch=branch))
         commits_ahead = repo.iter_commits('{remote}/{branch}..{branch}'.format(remote=self.remote, branch=branch))
         num_commits_behind = sum(1 for c in commits_behind)
         num_commits_ahead = sum(1 for c in commits_ahead)
     # happens with detached HEAD checkout like Travis CI does
     except TypeError as _:
         raise CriticalError(_)
     except GitCommandError as _:
         raise CriticalError(', '.join(str(_.stderr).split('\n')))
     self.msg = "git checkout branch '{}' is ".format(branch)
     if num_commits_ahead + num_commits_behind == 0:
         self.ok()
         self.msg += 'up to date with'
     else:
         self.critical()
         self.msg += '{} commits behind, {} commits ahead of'.format(num_commits_behind, num_commits_ahead)
     self.msg += " remote '{}'".format(self.remote)
     self.msg += ' | commits_behind={};0;0 commits_ahead={};0;0'.format(num_commits_behind, num_commits_ahead)
Example #23
0
 def check_http(self, host, port, url_path=''):
     if not isStr(url_path):
         url_path = ''
     url = '{protocol}://{host}:{port}/{url_path}'.format(protocol=self.protocol,
                                                          host=host,
                                                          port=port,
                                                          url_path=url_path.lstrip('/'))
     log.info('GET %s', url)
     try:
         # timeout here isn't total timeout, it's response time
         req = requests.get(url, timeout=self.request_timeout)
     except requests.exceptions.RequestException:
         return False
     except IOError:
         return False
     log.debug("%s - response: %s %s", url, req.status_code, req.reason)
     log.debug("%s - content:\n%s\n%s\n%s", url, '='*80, req.content.strip(), '='*80)
     if req.status_code != 200:
         return None
     if self.regex:
         log.info('%s - checking regex against content', url)
         if self.regex.search(req.content):
             log.info('%s - regex matched http output', url)
         else:
             log.info('%s - regex did not match http output', url)
             return None
     log.info("%s - passed all checks", url)
     return (host, port)
Example #24
0
 def check_media_file(self, filename):
     valid_media_msg = '%s => OK' % filename
     invalid_media_msg = '%s => INVALID' % filename
     try:
         # cmd = self.validate_cmd.format(filename)
         cmd = self.validate_cmd
         log.debug('cmd: %s %s', cmd, filename)
         log.info('verifying {0}'.format(filename))
         # capturing stderr to stdout because ffprobe prints to stderr in all cases
         # Python 2.7+
         #subprocess.check_output(cmd.split() + [filename], stderr=subprocess.STDOUT)
         proc = subprocess.Popen(cmd.split() + [filename], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
         (stdout, _) = proc.communicate()
         returncode = proc.wait()
         if returncode != 0 or (stdout is not None and 'Error' in stdout):
             _ = CalledProcessError(returncode, cmd)
             _.output = stdout
             raise _
         print(valid_media_msg)
     except CalledProcessError as _:
         if self.verbose > 2:
             print(_.output)
         if self.skip_errors:
             print(invalid_media_msg)
             self.failed = True
             return False
         die(invalid_media_msg)
 def run(self):
     version = self.get_version()
     log.info("got version '%s'", version)
     self.check_version(version)
     extra_info = self.extra_info()
     if extra_info:
         self.msg += extra_info
 def parse_json(self, json_data):
     log.info('parsing response')
     try:
         data = json_data['beans'][0]
         name_dir_statuses = data['NameDirStatuses']
         name_dir_data = json.loads(name_dir_statuses)
         active_dirs = name_dir_data['active']
         failed_dirs = name_dir_data['failed']
         num_active_dirs = len(active_dirs)
         num_failed_dirs = len(failed_dirs)
         self.msg = 'NameNode has {0} failed dir{1}'.format(num_failed_dirs, plural(num_failed_dirs))
         if num_failed_dirs > 0:
             self.warning()
             if self.verbose:
                 self.msg += ' ({0})'.format(', '.join(failed_dirs))
         self.msg += ', {0} active dir{1}'.format(num_active_dirs, plural(num_active_dirs))
         if num_active_dirs < 1:
             self.critical()
         if self.verbose and num_active_dirs > 0:
             self.msg += ' ({0})'.format(', '.join(active_dirs))
         self.msg += ' | num_failed_dirs={0} num_active_dirs={1}'.format(num_failed_dirs, num_active_dirs)
     except KeyError as _:
         raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\
                            .format(self.host, self.port, _, support_msg_api()))
     except ValueError as _:
         raise UnknownError("invalid json returned for NameDirStatuses by Namenode '{0}:{1}': {2}"\
                            .format(self.host, self.port, _))
 def connect(self):
     log.info('connecting to HBase Thrift Server at %s:%s', self.host, self.port)
     try:
         # cast port to int to avoid low level socket module TypeError for ports > 32000
         self.conn = happybase.Connection(host=self.host, port=int(self.port), timeout=10 * 1000)  # ms
     except (socket.error, socket.timeout, ThriftException, HBaseIOError) as _:
         qquit('CRITICAL', 'error connecting: {0}'.format(_))
Example #28
0
 def run(self):
     ###############
     # == Write == #
     start = time.time()
     self.write()
     end = time.time()
     self._write_timing = end - start
     log.info("read in %s secs", self._read_timing)
     ##############
     # == Read == #
     # Python 2.x
     super(KeyWriteNagiosPlugin, self).run()
     # Python 3.x
     # super().run()
     if self._read_value != self._write_value:
         raise CriticalError(
             "read back value '%s' does not match written value '%s'!" % (self._read_value, self._write_value)
         )
     ################
     # == Delete == #
     start = time.time()
     self.delete()
     end = time.time()
     self._delete_timing = end - start
     log.info("read in %s secs", self._read_timing)
    def run(self):
        self.no_args()
        host = self.get_opt('host')
        port = self.get_opt('port')
        user = self.get_opt('user')
        password = self.get_opt('password')
        if self.get_opt('ssl'):
            self.protocol = 'https'
        history_mins = self.get_opt('history_mins')
        num = self.get_opt('num')
        #inventory_id = self.get_opt('id')
        source = self.get_opt('source')
        dest = self.get_opt('dest')
        max_age = self.get_opt('max_age')
        max_runtime = self.get_opt('max_runtime')
        validate_host(host)
        validate_port(port)
        validate_user(user)
        validate_password(password)
        validate_float(history_mins, 'history mins')
        self.history_mins = float(history_mins)
        filter_opts = {}
        if self.history_mins:
            now = datetime.now()
            filter_opts['dateRangeStart'] = datetime.strftime(now - timedelta(minutes=self.history_mins), '%F %H:%M:%S')
            filter_opts['dateRangeEnd'] = datetime.strftime(now, '%F %H:%M:%S')
        if num is not None:
            validate_int(num, 'num ingestions', 1)
        #if inventory_id is not None:
        #    validate_chars(inventory_id, 'ingestion id', r'\w-')
        #    filter_opts['inventoryId'] = inventory_id
        if source is not None:
            log_option('source', source)
            filter_opts['fileName'] = source
        if dest is not None:
            log_option('dest', dest)
            filter_opts['destinationPath'] = dest
        if max_age is not None:
            validate_float(max_age, 'max age', 1)
            max_age = float(max_age)
        if max_runtime is not None:
            validate_float(max_runtime, 'max incomplete runtime', 1)
            max_runtime = float(max_runtime)

        self.url_base = '{protocol}://{host}:{port}/bedrock-app/services/rest'.format(host=host,
                                                                                      port=port,
                                                                                      protocol=self.protocol)
        # auth first, get JSESSIONID cookie
        # cookie jar doesn't work in Python or curl, must extract JSESSIONID to header manually
        #self.jar = cookielib.CookieJar()
        log.info('authenticating to Zaloni Bedrock')
        (_, self.auth_time) = self.req(url='{url_base}/admin/getUserRole'.format(url_base=self.url_base),
                                       # using json instead of constructing string manually,
                                       # this correctly escapes backslashes in password
                                       body=json.dumps({"username": user, "password": password}))
        if self.get_opt('list'):
            self.list_ingestions(num=num)

        self.check_ingestion(num=num, filter_opts=filter_opts, max_age=max_age, max_runtime=max_runtime)
Example #30
0
 def run(self):
     log.info("querying %s", self.software)
     url = "{protocol}://{host}:{port}/PolicyManagement/{api_version}/deployments".format(
         host=self.host, port=self.port, api_version=self.api_version, protocol=self.protocol
     )
     log.debug("GET %s", url)
     try:
         req = requests.get(url, auth=HTTPBasicAuth(self.user, self.password))
     except requests.exceptions.RequestException as _:
         errhint = ""
         if "BadStatusLine" in str(_.message):
             errhint = " (possibly connecting to an SSL secured port without using --ssl?)"
         elif self.protocol == "https" and "unknown protocol" in str(_.message):
             errhint = " (possibly connecting to a plain HTTP port with the -S / --ssl switch enabled?)"
         qquit("CRITICAL", str(_) + errhint)
     log.debug("response: %s %s", req.status_code, req.reason)
     log.debug("content:\n%s\n%s\n%s", "=" * 80, req.content.strip(), "=" * 80)
     if req.status_code == 400 and req.reason == "Bad Request":
         qquit(
             "CRITICAL",
             "{0}: {1} (possibly new install with no deployments yet?)".format(req.status_code, req.reason),
         )
     if req.status_code != 200:
         qquit("CRITICAL", "{0}: {1}".format(req.status_code, req.reason))
     try:
         json_list = json.loads(req.content)
         if log.isEnabledFor(logging.DEBUG):
             print(jsonpp(json_list))
             print("=" * 80)
         if not isList(json_list):
             raise ValueError("returned content is not a list")
         if not json_list:
             qquit("UNKNOWN", "no deployments found")
         last_deployment = json_list[0]
         userid = last_deployment["UserId"]
         description = last_deployment["Description"]
         hostname = last_deployment["HostName"]
         timestamp = last_deployment["timestamp"]
         last_deploy_datetime = datetime.strptime(timestamp, "%b %d, %Y %H:%M:%S %p")
     except (KeyError, ValueError) as _:
         qquit(
             "UNKNOWN",
             "error parsing output from {software}: {exception}: {error}. {support_msg}".format(
                 software=self.software, exception=type(_).__name__, error=_, support_msg=support_msg_api()
             ),
         )
     timedelta = datetime.now() - last_deploy_datetime
     mins = int(int(timedelta.total_seconds()) / 60)
     self.msg = "{software} last deployment was at '{timestamp}', {mins} mins ago".format(
         software=self.software, timestamp=timestamp, mins=mins
     )
     self.check_thresholds(mins)
     if self.verbose:
         self.msg += " by user '{userid}', host = '{hostname}', description = '{description}'".format(
             userid=userid, hostname=hostname, description=description
         )
     self.msg += " | mins_since_last_deployment={mins}{thresholds}".format(
         mins=mins, thresholds=self.get_perf_thresholds(boundary="lower")
     )
Example #31
0
    def run(self):
        job_id = self.get_opt('job_id')
        travis_token = self.get_opt('travis_token')
        if job_id is None:
            travis_token = os.getenv('JOB_ID')
        if travis_token is None:
            travis_token = os.getenv('TRAVIS_TOKEN')
        #if travis_token is None:
        #    self.usage('--travis-token option or ' +
        #               '$TRAVIS_TOKEN environment variable required to authenticate to the API')
        validate_chars(job_id, 'job id', '0-9')
        validate_alnum(travis_token, 'travis token')

        headers = {
            'Content-Type': 'application/json',
            'Accept': 'application/json',
            'Travis-API-Version': '3',
            'Authorization': 'token {0}'.format(travis_token)
        }
        log.info('triggering debug job {job_id}'.format(job_id=job_id))
        url = 'https://api.travis-ci.org/job/{job_id}/debug'.format(
            job_id=job_id)
        log.debug('POST %s' % url)
        try:
            req = requests.post(url, headers=headers)
        except requests.exceptions.RequestException as _:
            raise CriticalError(_)
        log.debug("response: %s %s", req.status_code, req.reason)
        log.debug("content:\n%s\n%s\n%s", '=' * 80, req.content.strip(),
                  '=' * 80)
        if req.status_code == 409:
            error_message = ''
            try:
                _ = json.loads(req.content)
                error_message = _['error_message']
            except ValueError:
                pass
            error_message += (
                " (if you've just retriggered this you can avoid this error " +
                "using the --ignore-running switch)")
            if self.get_opt('ignore_running'):
                log.info('job already running (ignoring)')
            else:
                log.info('job already running')
                raise CriticalError('{0} {1}: {2}'.format(
                    req.status_code, req.reason, error_message))
        elif req.status_code != 202:
            raise CriticalError("%s %s" % (req.status_code, req.reason))

        # don't need to query this if using the API address rather than the web UI address
        # as we don't need to figure out the repo name, just use the job id by itself


#        url = 'https://api.travis-ci.org/job/{job_id}'.format(job_id=job_id)
#        log.debug('GET %s' % url)
#        try:
#            req = requests.get(url, headers=headers)
#        except requests.exceptions.RequestException as _:
#            raise CriticalError(_)
#        log.debug("response: %s %s", req.status_code, req.reason)
#        log.debug("content:\n%s\n%s\n%s", '='*80, req.content.strip(), '='*80)
#        if req.status_code != 200:
#            raise CriticalError("%s %s" % (req.status_code, req.reason))
#
#        repo = None
#        try:
#            repo = json.loads(req.content)['repository']['slug']
#        except ValueError as _:
#            raise

        ssh_address = self.get_ssh_address(job_id=job_id)
        log.info('Executing: ssh -- {0}'.format(ssh_address))
        sys.stdout.flush()
        sys.stderr.flush()
        self.disable_timeout()
        os.execvp('ssh', ['--', ssh_address])
    def run(self):
        csv_file = self.get_opt('csv')
        avro_dir = self.get_opt('avro_dir')
        has_header = self.get_opt('has_header')
        # I don't know why the Spark guys made this a string instead of a bool
        header_str = 'false'
        if has_header:
            header_str = 'true'
        schema = self.get_opt('schema')
        # let Spark fail if csv/avro dir aren't available
        # can't check paths exist as want to remain generically portable
        # to HDFS, local filesystm or any other uri scheme Spark supports
        log.info("CSV Source: %s" % csv_file)
        log.info("Avro Destination: %s" % avro_dir)

        if schema:

            def get_type(arg):
                arg = str(arg).lower()
                if arg not in self.types_mapping:
                    self.usage(
                        "invalid type '%s' defined in --schema, must be one of: %s"
                        % (arg, ', '.join(sorted(self.types_mapping.keys()))))
                # return self.types_mapping[arg]
                module = __import__('pyspark.sql.types', globals(), locals(),
                                    ['types'], -1)
                class_ = getattr(module, self.types_mapping[arg])
                _ = class_()
                return _

            def create_struct(arg):
                name = str(arg).strip()
                data_type = 'string'
                if ':' in arg:
                    (name, data_type) = arg.split(':', 1)
                data_class = get_type(data_type)
                return StructField(name, data_class, True)

            # see https://github.com/databricks/spark-csv#python-api
            self.schema = StructType(
                [create_struct(_) for _ in schema.split(',')])
            log.info('generated CSV => Spark schema')

        conf = SparkConf().setAppName('HS PySpark CSV => Avro')
        sc = SparkContext(conf=conf)  # pylint: disable=invalid-name
        if self.verbose < 3 and 'setLogLevel' in dir(sc):
            sc.setLogLevel('WARN')
        sqlContext = SQLContext(sc)  # pylint: disable=invalid-name
        spark_version = sc.version
        log.info('Spark version detected as %s' % spark_version)

        if not isVersionLax(spark_version):
            die("Spark version couldn't be determined. " +
                support_msg('pytools'))

        #  pylint: disable=invalid-name
        df = None
        if isMinVersion(spark_version, 1.4):
            if has_header and not schema:
                log.info('inferring schema from CSV headers')
                df = sqlContext.read.format('com.databricks.spark.csv')\
                     .options(header=header_str, inferschema='true')\
                     .load(csv_file)
            else:
                log.info('using explicitly defined schema')
                schema = self.schema
                df = sqlContext.read\
                     .format('com.databricks.spark.csv')\
                     .options(header=header_str)\
                     .load(csv_file, schema=schema)
        else:
            die('Spark <= 1.3 is not supported due to avro dependency, sorry! ' + \
                'I may change this on request but prefer people just upgrade')
            # log.warn('running legacy code for Spark <= 1.3')
            # if has_header and not schema:
            #     log.info('inferring schema from CSV headers')
            #     df = sqlContext.load(source="com.databricks.spark.csv", path=csv_file,
            #                          header=header_str, inferSchema='true')
            # elif self.schema:
            #     log.info('using explicitly defined schema')
            #     df = sqlContext.load(source="com.databricks.spark.csv", path=csv_file,
            #                          header=header_str, schema=self.schema)
            # else:
            #     die('no header and no schema, caught late')
        # this doesn't work in Spark <= 1.3 and the github docs don't mention the older methods for writing avro using
        # the databricks avro driver
        df.write.format('com.databricks.spark.avro').save(avro_dir)
    def parse_results(self, content):
        build = self.get_latest_build(content)

        number = build['number']
        log.info('build number = %s', number)
        if not isInt(number):
            raise UnknownError('build number returned is not an integer!')

        message = build['message']
        log.info('message = %s', message)

        branch = build['branch']
        log.info('branch = %s', branch)

        commit = build['commit']
        log.info('commit = %s', commit)

        started_at = build['started_at']
        log.info('started_at  = %s', started_at)

        finished_at = build['finished_at']
        log.info('finished_at = %s', finished_at)

        duration = build['duration']
        log.info('duration = %s', duration)
        if not isInt(duration):
            raise UnknownError('duration returned is not an integer!')

        repository_id = build['repository_id']
        log.info('repository_id = %s', repository_id)
        if not isInt(repository_id):
            raise UnknownError('repository_id returned is not an integer!')

        result = build['result']
        log.info('result = %s', result)

        state = build['state']
        log.info('state = %s', state)

        if result == 0:
            self.ok()
            status = "PASSED"
        else:
            self.critical()
            status = "FAILED"

        self.msg = "Travis CI build #{number} {status} for repo '{repo}' in {duration} secs".format(\
                               number=number, status=status, repo=self.repo, duration=duration)
        self.check_thresholds(duration)
        self.msg += ", started_at='{0}'".format(started_at)
        self.msg += ", finished_at='{0}'".format(finished_at)

        if self.verbose:
            self.msg += ", message='{0}'".format(message)
            self.msg += ", branch='{0}'".format(branch)
            self.msg += ", commit='{0}'".format(commit)
            self.msg += ", repository_id='{0}'".format(repository_id)

        if self.verbose or self.builds_in_progress > 0:
            self.msg += ", {0} build{1} in progress".format(
                self.builds_in_progress, plural(self.builds_in_progress))
        self.msg += " | last_build_duration={duration}s{perf_thresholds} num_builds_in_progress={builds_in_progress}"\
                    .format(duration=duration,
                            perf_thresholds=self.get_perf_thresholds(),
                            builds_in_progress=self.builds_in_progress)
Example #34
0
 def process_options(self):
     self.expected = self.get_opt('expected')
     if self.expected is not None:
         validate_regex(self.expected)
         log.info('expected version regex: %s', self.expected)
Example #35
0
 def print_log(self, build=None, job_id=None):
     if job_id:
         self.print_job_log(job_id=job_id)
         log.info('=' * 80)
         log.info('end of log for job id %s', job_id)
         log.info('=' * 80 + '\n')
     else:
         if not build:
             code_error('no job id passed to print_log(), nor build to determine job from')
         log.info('getting job id for build #%s', build['number'])
         if 'jobs' not in build:
             raise UnknownError('no jobs field found in build, {0}'.format(support_msg_api))
         for _ in build['jobs']:
             _id = _['id']
             url = 'https://api.travis-ci.org/jobs/{id}'.format(id=_id)
             req = self.request_handler.get(url)
             # if this raises ValueError it'll be caught by run handler
             job_data = json.loads(req.content)
             if log.isEnabledFor(logging.DEBUG):
                 log.debug("job id %s status:\n%s", _id, jsonpp(job_data))
             if self.failed is True:
                 if job_data['state'] == 'finished' and job_data['status'] in (None, 1, '1'):
                     job = job_data
             else:
                 job = job_data
         if not job:
             raise UnknownError('no job found in build {0}'.format(build['number']))
         self.print_job_log(job=job)
         log.info('=' * 80)
         log.info('end of log for build number #%s job id %s', build['number'], job['id'])
         log.info('=' * 80 + '\n')
Example #36
0
    def process_result(self, result):
        _id = result['id']
        log.info('latest build id: %s', _id)

        status = result['status']
        log.info('status: %s', status)
        if not isInt(status, allow_negative=True):
            raise UnknownError(
                'non-integer status returned by DockerHub API. {0}'.format(
                    support_msg_api()))

        tag = result['dockertag_name']
        log.info('tag: %s', tag)

        trigger = result['cause']
        log.info('trigger: %s', trigger)

        created_date = result['created_date']
        log.info('created date: %s', created_date)

        last_updated = result['last_updated']
        log.info('last updated: %s', last_updated)

        created_datetime = datetime.datetime.strptime(
            created_date.split('.')[0], '%Y-%m-%dT%H:%M:%S')
        updated_datetime = datetime.datetime.strptime(
            last_updated.split('.')[0], '%Y-%m-%dT%H:%M:%S')
        build_latency_timedelta = updated_datetime - created_datetime
        build_latency = build_latency_timedelta.total_seconds()
        log.info('build latency (creation to last updated): %s', build_latency)
        # results in .0 floats anyway
        build_latency = int(build_latency)

        build_code = result['build_code']
        build_url = 'https://hub.docker.com/r/{0}/builds/{1}'.format(
            self.repo, build_code)
        log.info('latest build URL: %s', build_url)

        if str(status) in self.statuses:
            status = self.statuses[str(status)]
        else:
            log.warning("status code '%s' not recognized! %s", status,
                        support_msg_api())
            log.warning('defaulting to assume status is an Error')
            status = 'Error'
        if status != 'Success':
            self.critical()
        self.msg += "'{repo}' last completed build status: '{status}', tag: '{tag}', build code: {build_code}"\
                    .format(repo=self.repo, status=status, tag=tag, build_code=build_code)
        if self.verbose:
            self.msg += ', id: {0}'.format(_id)
            self.msg += ', trigger: {0}'.format(trigger)
            self.msg += ', created date: {0}'.format(created_date)
            self.msg += ', last updated: {0}'.format(last_updated)
            self.msg += ', build_latency: {0}'.format(sec2human(build_latency))
            self.msg += ', build URL: {0}'.format(build_url)
        self.msg += ' | build_latency={0:d}s'.format(build_latency)
 def check_http(self, host, port, url_path=''):
     if not isStr(url_path):
         url_path = ''
     url = '{protocol}://{host}:{port}/{url_path}'.format(
         protocol=self.protocol,
         host=host,
         port=port,
         url_path=url_path.lstrip('/'))
     log.info('GET %s', url)
     try:
         # timeout here isn't total timeout, it's response time
         req = requests.get(url, timeout=self.request_timeout)
     except requests.exceptions.RequestException as _:
         log.info('%s - returned exception: %s', url, _)
         return False
     except IOError as _:
         log.info('%s - returned IOError: %s', url, _)
         return False
     log.debug("%s - response: %s %s", url, req.status_code, req.reason)
     log.debug("%s - content:\n%s\n%s\n%s", url, '=' * 80,
               req.content.strip(), '=' * 80)
     if req.status_code != 200:
         log.info('%s - status code %s != 200', url, req.status_code)
         return None
     if self.regex:
         log.info('%s - checking regex against content', url)
         # if this ends up not being processed properly and remains a string instead
         # of the expected compiled regex, then .search() will hang
         if isStr(self.regex):
             die('string found instead of expected compiled regex!')
         if self.regex.search(req.content):
             log.info('%s - regex matched http output', url)
         else:
             log.info('%s - regex did not match http output', url)
             return None
     log.info("%s - passed all checks", url)
     return (host, port)
Example #38
0
 def run(self):
     start = time.time()
     log.info('subscribing')
     self.subscribe()
     log.info('publishing message "%s"', self.publish_message)
     start_publish = time.time()
     self.publish()
     stop_publish = time.time()
     self._publish_time = round(stop_publish - start_publish,
                                self._precision)
     log.info('published in %s secs', self._publish_time)
     if self.sleep_secs:
         log.info('sleeping for %s secs', self.sleep_secs)
         time.sleep(self.sleep_secs)
     start_consume = time.time()
     log.info('consuming message')
     self._consumed_message = self.consume()
     stop_consume = time.time()
     self._consume_time = round(stop_consume - start_consume,
                                self._precision)
     log.info('consumed in %s secs', self._consume_time)
     log.info('consumed message = "%s"', self._consumed_message)
     # resetting to ok is bad - would break inheritance logic
     #self.ok()
     stop = time.time()
     self._total_time = round(stop - start, self._precision)
Example #39
0
 def get_table_conn(self):
     log.info('checking table \'%s\'', self.table)
     if not self.conn.is_table_enabled(self.table):
         qquit('CRITICAL', "table '{0}' is not enabled!".format(self.table))
     table_conn = self.conn.table(self.table)
     return table_conn
Example #40
0
    def run(self):
        csv_file = self.get_opt('csv')
        parquet_dir = self.get_opt('parquet_dir')
        has_header = self.get_opt('has_header')
        # I don't know why the Spark guys made this a string instead of a bool
        header_str = 'false'
        if has_header:
            header_str = 'true'
        schema = self.get_opt('schema')
        # let Spark fail if csv/parquet aren't available
        # can't check paths exist as want to remain generically portable
        # to HDFS, local filesystm or any other uri scheme Spark supports
        log.info("CSV Source: %s" % csv_file)
        log.info("Parquet Destination: %s" % parquet_dir)

        if schema:
            def get_type(arg):
                arg = str(arg).lower()
                if arg not in self.types_mapping:
                    self.usage("invalid type '%s' defined in --schema, must be one of: %s"
                               % (arg, ', '.join(sorted(self.types_mapping.keys()))))
                # return self.types_mapping[arg]
                module = __import__('pyspark.sql.types', globals(), locals(), ['types'], -1)
                class_ = getattr(module, self.types_mapping[arg])
                _ = class_()
                return _

            def create_struct(arg):
                name = arg
                data_type = 'string'
                if ':' in arg:
                    (name, data_type) = arg.split(':', 1)
                data_class = get_type(data_type)
                return StructField(name, data_class, True)
            # see https://github.com/databricks/spark-csv#python-api
            self.schema = StructType([create_struct(_) for _ in schema.split(',')])
            log.info('generated CSV => Spark schema')

        conf = SparkConf().setAppName('HS PySpark CSV => Parquet')
        sc = SparkContext(conf=conf) # pylint: disable=invalid-name
        sqlContext = SQLContext(sc)  # pylint: disable=invalid-name
        spark_version = sc.version
        log.info('Spark version detected as %s' % spark_version)

        if not isVersionLax(spark_version):
            die("Spark version couldn't be determined. " + support_msg('pytools'))

         # pylint: disable=invalid-name

        df = None
        if isMinVersion(spark_version, 1.4):
            if has_header and not schema:
                log.info('inferring schema from CSV headers')
                df = sqlContext.read.format('com.databricks.spark.csv')\
                     .options(header=header_str, inferschema='true')\
                     .load(csv_file)
            else:
                log.info('using explicitly defined schema')
                df = sqlContext.read\
                     .format('com.databricks.spark.csv')\
                     .options(header=header_str)\
                     .load(csv_file, schema=self.schema)
            df.write.parquet(parquet_dir)
        else:
            log.warn('running legacy code for Spark <= 1.3')
            if has_header and not schema:
                log.info('inferring schema from CSV headers')
                df = sqlContext.load(source="com.databricks.spark.csv", path=csv_file,
                                     header=header_str, inferSchema='true')
            elif self.schema:
                log.info('using explicitly defined schema')
                schema = self.schema
                df = sqlContext.load(source="com.databricks.spark.csv", path=csv_file,
                                     header=header_str, schema=schema)
            else:
                die('no header and no schema, caught late')
            df.saveAsParquetFile(parquet_dir)
Example #41
0
 def launch_job(self):
     log.info('triggering debug job {job_id}'.format(job_id=self.job_id))
     url = 'https://api.travis-ci.org/job/{job_id}/debug'.format(job_id=self.job_id)
     self.request_handler.check_response_code = self.check_job_launch_response_code
     self.request_handler.post(url, headers=self.headers)
Example #42
0
    def run(self):
        self.no_args()
        host = self.get_opt('host')
        port = self.get_opt('port')
        user = self.get_opt('user')
        password = self.get_opt('password')
        if self.get_opt('ssl'):
            self.protocol = 'https'
        history_mins = self.get_opt('history_mins')
        num = self.get_opt('num')
        #inventory_id = self.get_opt('id')
        source = self.get_opt('source')
        dest = self.get_opt('dest')
        max_age = self.get_opt('max_age')
        max_runtime = self.get_opt('max_runtime')
        validate_host(host)
        validate_port(port)
        validate_user(user)
        validate_password(password)
        validate_float(history_mins, 'history mins')
        self.history_mins = float(history_mins)
        filter_opts = {}
        if self.history_mins:
            now = datetime.now()
            filter_opts['dateRangeStart'] = datetime.strftime(
                now - timedelta(minutes=self.history_mins), '%F %H:%M:%S')
            filter_opts['dateRangeEnd'] = datetime.strftime(now, '%F %H:%M:%S')
        if num is not None:
            validate_int(num, 'num ingestions', 1)
        #if inventory_id is not None:
        #    validate_chars(inventory_id, 'ingestion id', r'\w-')
        #    filter_opts['inventoryId'] = inventory_id
        if source is not None:
            log_option('source', source)
            filter_opts['fileName'] = source
        if dest is not None:
            log_option('dest', dest)
            filter_opts['destinationPath'] = dest
        if max_age is not None:
            validate_float(max_age, 'max age', 1)
            max_age = float(max_age)
        if max_runtime is not None:
            validate_float(max_runtime, 'max incomplete runtime', 1)
            max_runtime = float(max_runtime)

        self.url_base = '{protocol}://{host}:{port}/bedrock-app/services/rest'.format(
            host=host, port=port, protocol=self.protocol)
        # auth first, get JSESSIONID cookie
        # cookie jar doesn't work in Python or curl, must extract JSESSIONID to header manually
        #self.jar = cookielib.CookieJar()
        log.info('authenticating to Zaloni Bedrock')
        (_, self.auth_time) = self.req(
            url='{url_base}/admin/getUserRole'.format(url_base=self.url_base),
            # using json instead of constructing string manually,
            # this correctly escapes backslashes in password
            body=json.dumps({
                "username": user,
                "password": password
            }))
        if self.get_opt('list'):
            self.list_ingestions(num=num)

        self.check_ingestion(num=num,
                             filter_opts=filter_opts,
                             max_age=max_age,
                             max_runtime=max_runtime)
Example #43
0
 def get_ingestions(self, num=None, filter_opts=None):
     log.info('getting ingestion history')
     if num:
         chunk_size = num
         log.info('explicit number of results requested: %s', chunk_size)
     elif filter_opts:
         chunk_size = 10
         log.info('filters detected, defaulting number of results to %s',
                  chunk_size)
     else:
         chunk_size = 100
         log.info('using catch all default result limit of %s', chunk_size)
     settings = {'chunkSize': chunk_size, 'currentPage': 1}
     if filter_opts is not None:
         if not isDict(filter_opts):
             code_error(
                 'passed non-dictionary for filter opts to get_ingestions')
         for key, value in sorted(filter_opts.items()):
             log.info("filter: '%s' = '%s'", key, value)
         settings = merge_dicts(settings, filter_opts)
     log.info('settings: %s', settings)
     log.info('querying Zaloni for ingestion history')
     (req, self.query_time) = self.req(
         url='{url_base}/ingestion/publish/getFileIndex'.format(
             url_base=self.url_base),
         # orders by newest first, but seems to return last 10 anyway
         body=json.dumps(settings))
     try:
         log.info('parsing JSON response')
         json_dict = json.loads(req.content)
     except ValueError as _:
         qquit('UNKNOWN',
               'error parsing json returned by Zaloni: {0}'.format(_))
     return json_dict
    def run(self):
        self.no_args()
        host = self.get_opt('host')
        port = self.get_opt('port')
        user = self.get_opt('user')
        password = self.get_opt('password')
        self._all = self.get_opt('all')
        workflow_id = self.get_opt('id')
        workflow_name = self.get_opt('name')
        max_age = self.get_opt('max_age')
        max_runtime = self.get_opt('max_runtime')
        if self.get_opt('ssl'):
            self.protocol = 'https'
        validate_host(host)
        validate_port(port)
        validate_user(user)
        validate_password(password)
        if workflow_id is not None:
            if workflow_name is not None:
                self.usage(
                    'cannot specify both --id and --name simultaneously')
            validate_int(workflow_id, 'workflow id', 1)
            workflow_id = int(workflow_id)
        elif workflow_name is not None:
            validate_chars(workflow_name, 'workflow name', r'\w\s-')
        elif self.get_opt('list'):
            pass
        else:
            self.usage(
                'must specify one of --name / --id / --all or use --list to find workflow names/IDs to specify'
            )
        if max_age is not None:
            validate_float(max_age, 'max age', 1)
            max_age = float(max_age)
        if max_runtime is not None:
            validate_float(max_runtime, 'max runtime', 1)
            max_runtime = float(max_runtime)

        self.url_base = '{protocol}://{host}:{port}/bedrock-app/services/rest'.format(
            host=host, port=port, protocol=self.protocol)
        # auth first, get JSESSIONID cookie
        # cookie jar doesn't work in Python or curl, must extract JSESSIONID to header manually
        #self.jar = cookielib.CookieJar()
        log.info('authenticating to Zaloni Bedrock')
        (_, self.auth_time) = self.req(
            url='{url_base}/admin/getUserRole'.format(url_base=self.url_base),
            # using json instead of constructing string manually,
            # this correctly escapes backslashes in password
            body=json.dumps({
                "username": user,
                "password": password
            }))
        # alternative method
        #session = requests.Session()
        #req = self.req(session,
        #               url='http://%(host)s:%(port)s/bedrock-app/services/rest/%(user)s/getUserRole' % locals(),
        #               method='POST')

        if self.get_opt('list'):
            self.list_workflows()

        if self._all:
            workflows = self.get_workflows()
            if not workflows or len(workflows) == 0:
                qquit('UNKNOWN', 'no workflows found')
            results = {}
            try:
                for workflow in workflows:
                    result = self.check_workflow(workflow['wfName'], None)
                    if result is None:
                        results['No Runs'] = results.get('None', 0)
                        results['No Runs'] += 1
                        continue
                    results[result] = results.get(result, 0)
                    results[result] += 1
                self.msg = 'Zaloni workflows: '
                for result in results:
                    self.msg += "'{0}' = {1}, ".format(result, results[result])
                self.msg = self.msg.rstrip(', ')
            except KeyError as _:
                qquit(
                    'UNKNOWN',
                    'parsing workflows for --all failed: {0}. '.format(_) +
                    support_msg_api())
        else:
            self.check_workflow(workflow_name, workflow_id, max_age,
                                max_runtime)
Example #45
0
 def check_channel(self):
     log.info('checking channel is still open')
     if not self.channel.is_open:
         raise CriticalError('channel closed')
Example #46
0
 def disable_timeout(self):
     log.info('disabling timeout')
     self.timeout = 0
     signal.alarm(0)
Example #47
0
 def check_connection(self):
     log.info('checking connection is still open')
     if not self.conn.is_open:
         raise CriticalError('connection closed')
Example #48
0
 def subscribe(self):
     credentials = pika.credentials.PlainCredentials(
         self.user, self.password)
     parameters = pika.ConnectionParameters(
         host=self.host,
         port=self.port,
         virtual_host=self.vhost,
         credentials=credentials,
         heartbeat_interval=1,
         ssl=self.ssl,
         connection_attempts=self.default_conn_attempts,
         retry_delay=self.retry_delay,
         backpressure_detection=True,
         # socket_timeout – Use for high latency networks
     )
     self.conn = pika.BlockingConnection(parameters=parameters)
     log.debug('adding blocked connection callback')
     self.conn.add_on_connection_blocked_callback(
         self.connection_blocked_callback)
     log.debug(
         'adding connection timeout to one 3rd of total timeout (%.2f out of %.2f secs)',
         self.timeout / 3, self.timeout)
     # no args to this callback
     self.conn.add_timeout(self.timeout / 3,
                           self.connection_timeout_handler)
     #
     self.check_connection()
     log.info('requesting channel')
     self.channel = self.conn.channel()
     log.info('got channel number %s', self.channel.channel_number)
     log.debug('adding channel cancel callback')
     self.channel.add_on_cancel_callback(self.connection_cancel_callback)
     # newer versions of RabbitMQ won't use this but will instead use TCP backpressure
     # not available on BlockingChannel
     #self.channel.add_on_flow_callback(self.on_flow_callback)
     log.debug('adding return callback')
     # not available on BlockingChannel
     #self.channel.add_on_return_callback(self.connection_return_callback)
     if self.use_transactions:
         log.info('setting channel to use AMQP transactions')
         self.channel.tx_select()
     else:
         log.info('setting RabbitMQ specific channel confirmation')
         # different in BlockingChannel
         #self.channel.confirm_delivery(callback=self.confirm_delivery_callback, nowait=False)
         self.channel.confirm_delivery()
     self.check_channel()
     log.info('declaring queue \'%s\'', self.queue)
     if self.queue:
         result = self.channel.queue_declare(queue=self.queue,
                                             durable=self.durable)
         if self.queue != result.method.queue:
             raise UnknownError("queue returned in subscribe ('{queue_returned}') "\
                                .format(queue_returned=result.method.queue) + \
                                "did not match requested queue name ('{queue}')"\
                                .format(queue=self.queue))
     else:
         # auto-generate uniq queue, durable flag is ignored for exclusive
         result = self.channel.queue_declare(exclusive=True)
         self.queue = result.method.queue
         self.routing_key = self.queue
     log.info('was assigned unique exclusive queue: %s', self.queue)
     if self.exchange:
         log.info("declaring exchange: '%s', type: '%s'", self.exchange,
                  self.exchange_type)
         self.channel.exchange_declare(exchange=self.exchange,
                                       type=self.exchange_type)
         # if using nameless exchange this isn't necessary as routing key will send to queue
         log.info("binding queue '%s' to exchange '%s'", self.queue,
                  self.exchange)
         self.channel.queue_bind(exchange=self.exchange, queue=self.queue)