def check_path(self, path): if os.path.isfile(path): # files given explicitly are checked regardless # if self.regex and self.regex.search(path): self.check_media_file(path) elif os.path.isdir(path): listing = [] try: listing = os.listdir(path) listing = [x for x in listing if x[0] != '.'] except OSError as _: if self.skip_errors: print(_) self.failed = True else: die(_) for item in listing: subpath = os.path.join(path, item) if os.path.isdir(subpath): self.check_path(subpath) elif self.regex: if self.regex.search(item): self.check_media_file(subpath) elif self.re_media_suffix.match(item): self.check_media_file(subpath) else: die("failed to determine if path '%s' is file or directory" % path)
def check_multirecord_json(self): log.debug('check_multirecord_json()') for line in self.iostream: if isJson(line): # can't use self.print() here, don't want to print valid for every line of a file / stdin if self.passthru: print(line, end='') elif isJson(line.replace("'", '"')): if self.permit_single_quotes: log.debug('valid multirecord json (single quoted)') # self.single_quotes_detected = True if self.passthru: print(line, end='') else: log.debug('invalid multirecord json (single quoted)') self.failed = True if not self.passthru: die('%s (multi-record format)' % self.invalid_json_msg_single_quotes) return False else: log.debug('invalid multirecord json') self.failed = True return False # self.multi_record_detected = True log.debug('multirecord json (all lines passed)') if not self.passthru: print('%s (multi-record format)' % self.valid_json_msg) return True
def run(self): self.no_args() json_file = self.options.json avro_dir = self.options.avro_dir # let Spark fail if json/avro dir aren't available # can't check paths exist as want to remain generically portable # to HDFS, local filesystm or any other uri scheme Spark supports log.info("Json Source: %s" % json_file) log.info("Avro Destination: %s" % avro_dir) conf = SparkConf().setAppName('HS PySpark Json => Avro') sc = SparkContext(conf=conf) # pylint: disable=invalid-name sqlContext = SQLContext(sc) # pylint: disable=invalid-name spark_version = sc.version log.info('Spark version detected as %s' % spark_version) if not isVersionLax(spark_version): die("Spark version couldn't be determined. " + support_msg('pytools')) # pylint: disable=invalid-name df = None if isMinVersion(spark_version, 1.4): df = sqlContext.read.json(json_file) else: die('Spark <= 1.3 is not supported due to avro dependency, sorry! ' + \ 'I may change this on request but prefer people just upgrade') # log.warn('running legacy code for Spark <= 1.3') #json = sqlContext.jsonFile(json_file) # this doesn't work in Spark <= 1.3 and the github docs don't mention the older methods for writing avro using # the databricks avro driver df.write.format('com.databricks.spark.avro').save(avro_dir)
def check_media_file(self, filename): valid_media_msg = '%s => OK' % filename invalid_media_msg = '%s => INVALID' % filename try: # cmd = self.validate_cmd.format(filename) cmd = self.validate_cmd log.debug('cmd: %s %s', cmd, filename) log.info('verifying {0}'.format(filename)) # capturing stderr to stdout because ffprobe prints to stderr in all cases # Python 2.7+ #subprocess.check_output(cmd.split() + [filename], stderr=subprocess.STDOUT) proc = subprocess.Popen(cmd.split() + [filename], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (stdout, _) = proc.communicate() returncode = proc.wait() if returncode != 0 or (stdout is not None and 'Error' in stdout): _ = CalledProcessError(returncode, cmd) _.output = stdout raise _ print(valid_media_msg) except CalledProcessError as _: if self.verbose > 2: print(_.output) if self.skip_errors: print(invalid_media_msg) self.failed = True return False die(invalid_media_msg)
def connection(self, host, port, user, password, ssl=False, **kwargs): # must set X-Requested-By in newer versions of Ambari self.x_requested_by = user if user == 'admin': self.x_requested_by = os.getenv('USER', user) #log.info("contacting Ambari as '%s'" % self.user) if not isHost(host) or not isPort(port) or not isUser(user) or not password: raise InvalidOptionException('invalid options passed to AmbariBlueprint()') proto = 'http' # pylint: disable=unused-variable if ssl: proto = 'https' self.host = host self.port = port self.user = user self.password = password # if kwargs.has_key('strip_config') and kwargs['strip_config']: if 'strip_config' in kwargs and kwargs['strip_config']: self.strip_config = True self.url_base = '%(proto)s://%(host)s:%(port)s/api/v1' % locals() if 'dir' in kwargs and kwargs['dir']: self.blueprint_dir = kwargs['dir'] if not isDirname(self.blueprint_dir): qquit('UNKNOWN', 'invalid dir arg passed to AmbariBlueprintTool') try: if not self.blueprint_dir or not os.path.exists(self.blueprint_dir): log.info("creating blueprint data dir '%s'" % self.blueprint_dir) os.mkdir(self.blueprint_dir) if not os.path.isdir(self.blueprint_dir): raise IOError("blueprint dir '%s'already taken and is not a directory" % self.blueprint_dir) except IOError as _: die("'failed to create dir '%s': %s" % (self.blueprint_dir, _))
def check_path(self, path): # os.walk returns nothing if path is a file, and must store file names, sizes, checksums and regex captures # even for standalone file args if os.path.isfile(path): self.check_file(path) elif os.path.isdir(path): # returns generator # root is the dir, dirs and files are child basenames #for root, dirs, files in os.walk(path): for root, dirs, files in os.walk(path): #log.debug('root = %s', root) #log.debug('files = %s', files) # do not check hidden subdirs if not self.include_dot_dirs: # results in 'IndexError: string index out of range' if suffixed with '/' # if os.path.basename(root)[0] == '.': # continue # could regex strip all suffixed '/' but it's cheaper to just modify the dirs list in place dirs[:] = [d for d in dirs if d[0] != '.'] for filebasename in files: filepath = os.path.join(root, filebasename) try: self.is_file_dup(filepath) except OSError as exc: log.error("error while checking file '{0}': {1}".format(filepath, exc)) self.failed = True else: die("'%s' is not a file or directory")
def run(self): self.num_lines = self.options.num vlog_option('number of lines', self.num_lines) self.quiet = self.options.quiet vlog_option('quiet', self.quiet) if not self.args: self.args.append('-') for arg in self.args: if arg == '-': continue if not os.path.exists(arg): print("'%s' not found" % arg) sys.exit(ERRORS['WARNING']) if os.path.isfile(arg): vlog_option('file', arg) elif os.path.isdir(arg): vlog_option('directory', arg) else: die("path '%s' could not be determined as either a file or directory" % arg) for filename in self.args: if filename == '-': self.headtail(sys.stdin.read()) else: with open(filename) as _: self.headtail(_.read()) if not self.quiet and len(self.args) > 1: print(self.docsep)
def check_path(self, path, branch): status = True (branch_base, _) = self.branch_version(branch) if os.path.isfile(path): return self.check_file(path, branch) elif os.path.isdir(path): if os.path.basename(path) == '.git': return True for item in os.listdir(path): subpath = os.path.join(path, item) if os.path.islink(subpath): subpath = os.path.realpath(subpath) if os.path.isdir(subpath): subpath_base = os.path.basename(subpath) #log.debug('subpath_base = %s', subpath_base) if self.normalize_name(subpath_base) == self.normalize_name(branch_base): if not self.check_path(subpath, branch): status = False elif os.path.isfile(subpath): if not self.check_file(subpath, branch): status = False elif not os.path.exists(subpath): log.debug("subpath '%s' does not exist in branch '%s', skipping..." % (subpath, branch)) else: die("failed to determine if subpath '%s' is file or directory in branch '%s'" % (subpath, branch)) elif not os.path.exists(path): log.debug("path '%s' does not exist in branch '%s', skipping..." % (path, branch)) else: die("failed to determine if path '%s' is file or directory in branch '%s'" % (path, branch)) return status
def check_path(self, path, tag): status = True if os.path.isfile(path): return self.check_file(path, tag) elif os.path.isdir(path): if os.path.basename(path) == ".git": return True for item in os.listdir(path): subpath = os.path.join(path, item) if os.path.islink(subpath): subpath = os.path.realpath(subpath) if os.path.isdir(subpath): tag_base = tag.rsplit("-", 1)[0] subpath_base = os.path.basename(subpath) # log.debug('tag_base = %s', tag_base) # log.debug('subpath_base = %s', subpath_base) if subpath_base == tag_base: if not self.check_path(subpath, tag): status = False elif os.path.isfile(subpath): if not self.check_file(subpath, tag): status = False elif not os.path.exists(subpath): log.debug("subpath '%s' does not exist in tag '%s', skipping..." % (subpath, tag)) else: die("failed to determine if subpath '%s' is file or directory in tag '%s'" % (subpath, tag)) elif not os.path.exists(path): log.debug("path '%s' does not exist in tag '%s', skipping..." % (path, tag)) else: die("failed to determine if path '%s' is file or directory in tag '%s'" % (path, tag)) return status
def check_file(self, filename, tag): filename = os.path.abspath(filename) if os.path.basename(filename) != "Dockerfile": return True parent = os.path.basename(os.path.dirname(filename)) tag_base = tag.rsplit("-", 1)[0] if parent.lower() != tag_base.lower(): log.debug( "skipping '{0}' as it's parent directory '{1}' doesn't match tag base '{2}'".format( filename, parent, tag_base ) ) return True self.valid_git_tags_msg = "%s => Dockerfile Git Tags OK" % filename self.invalid_git_tags_msg = "%s => Dockerfile Git Tags MISMATCH in tag '%s'" % (filename, tag) try: if not self.check_dockerfile_arg(filename, tag): self.failed = True # print(self.invalid_git_tags_msg) return False # now switched to per tag scan this returns way too much redundant output # print(self.valid_git_tags_msg) except IOError as _: die("ERROR: %s" % _) return True
def check_git_branches_dockerfiles(self, target): gitroot = find_git_root(target) if gitroot is None: die('Failed to find git root for target {0}'.format(target)) log.debug("finding branches for target '{0}'".format(target)) repo = git.Repo(gitroot) branches = [str(x) for x in repo.refs if isinstance(x, git.refs.remote.RemoteReference)] branches = [x.split('/')[-1] for x in branches] branches = [x for x in branches if x not in ('HEAD', 'master')] if self.branch_prefix is not None: log.debug('restricting to branches matching branch prefix') branches = [x for x in branches if self.branch_prefix.match(x)] #if log.isEnabledFor(logging.DEBUG): log.debug('\n\nbranches for target %s:\n\n%s\n', target, '\n'.join(branches)) original_checkout = 'master' try: try: original_checkout = repo.active_branch.name except TypeError as _: pass for branch in branches: log.debug("checking branch '%s' Dockerfiles for target '%s'", branch, target) self.branches_checked += 1 try: repo.git.checkout(branch) except git.exc.GitCommandError as _: die(_) self.check_path(target, branch) except Exception as _: # pylint: disable=broad-except traceback.print_exc() sys.exit(1) finally: log.debug("returning to original checkout '%s'", original_checkout) repo.git.checkout(original_checkout)
def run(self): parquet_file = self.get_opt('parquet') avro_dir = self.get_opt('avro_dir') # let Spark fail if avro/parquet aren't available # can't check paths exist as want to remain generically portable # to HDFS, local filesystm or any other uri scheme Spark supports log.info("Parquet Source: %s" % parquet_file) log.info("Avro Destination: %s" % avro_dir) conf = SparkConf().setAppName('HS PySpark Parquet => Avro') sc = SparkContext(conf=conf) # pylint: disable=invalid-name sqlContext = SQLContext(sc) # pylint: disable=invalid-name spark_version = sc.version log.info('Spark version detected as %s' % spark_version) if not isVersionLax(spark_version): die("Spark version couldn't be determined. " + support_msg('pytools')) # pylint: disable=invalid-name if isMinVersion(spark_version, 1.4): # this doesn't work in Spark <= 1.3 - github docs don't mention the older .method() for writing avro df = sqlContext.read.parquet(parquet_file) df.write.format('com.databricks.spark.avro').save(avro_dir) else: die('Spark <= 1.3 is not supported due to avro dependency, sorry! ' + \ 'I may change this on request but prefer people just upgrade')
def print_table_regions(self): print('=' * self.total_width) print('{0:{1}}{2}'.format(self.region_header, self.region_width, self.separator), end='') print('{0:{1}}{2}'.format(self.start_key_header, self.start_key_width, self.separator), end='') print('{0:{1}}{2}'.format(self.end_key_header, self.end_key_width, self.separator), end='') print('{0}'.format(self.server_header)) print('=' * self.total_width) try: for region in self._regions: print('{0:{1}}{2}'.format(self.bytes_to_str(self.shorten_region_name(region['name'])), self.region_width, self.separator), end='') print('{0:{1}}{2}'.format(self.bytes_to_str(region['start_key']), self.start_key_width, self.separator), end='') print('{0:{1}}{2}'.format(self.bytes_to_str(region['end_key']), self.end_key_width, self.separator), end='') print('{0}:{1}'.format(region['server_name'], region['port'])) except KeyError as _: die('error parsing region info: {0}. '.format(_) + support_msg_api()) print('\nNumber of Regions: {0:d}'.format(len(self._regions)))
def check_version(self, filename, branch, branch_base, arg_var, found_version, branch_version): self.branches_dockerfile_checked.add(branch) self.dockerfiles_checked.add(filename) if arg_var: log.debug("found arg '%s'", arg_var) arg_version = "ARG '{0}={1}'".format(arg_var, found_version) else: arg_version = "'{0}'".format(found_version) #log.debug("arg '%s' matches branch base '%s'", argversion.group(1), branch_base) log.debug("comparing '%s' contents to version derived from branch '%s' => '%s'", filename, branch, branch_version) if not isVersion(branch_version.lstrip('jdk').lstrip('jre')): die("unrecognized branch version '{0}' for branch_base '{1}'" .format(branch_version, branch_base)) #if branch_version == found_version or branch_version == found_version.split('.', 2)[0]: if found_version[0:len(branch_version)] == branch_version: log.info("{0} version '{1}' matches {2}". format(self.valid_git_branches_msg, branch_version, arg_version)) else: log.error("{0} version '{1}' vs Dockerfile {2}". format(self.invalid_git_branches_msg, branch_version, arg_version)) self.dockerfiles_failed += 1 self.branches_failed.add(branch) return False return True
def run(self): # might have to use compat / transport / protocol args for older versions of HBase or if protocol has been # configured to be non-default, see: # http://happybase.readthedocs.io/en/stable/api.html#connection try: log.info('connecting to HBase Thrift Server at {0}:{1}'.format(self.host, self.port)) self.conn = happybase.Connection(host=self.host, port=self.port, timeout=10 * 1000) # ms tables = self.get_tables() # of course there is a minor race condition here between getting the table list, checking and creating # not sure if it's solvable, if you have any idea of how to solve it please let me know, even locking # would only protect again multiple runs of this script on the same machine... if self.table in tables: if self.drop_table: log.info("table '%s' already existed but -d / --drop-table was specified, removing table first", self.table) self.conn.delete_table(self.table, disable=True) # wait up to 30 secs for table to be deleted #for _ in range(30): # if self.table not in self.get_tables(): # break # log.debug('waiting for table to be deleted before creating new one') # time.sleep(1) elif self.use_existing_table: pass else: die("WARNING: table '{0}' already exists, will not send data to a pre-existing table for safety"\ .format(self.table) + ". You can choose to either --drop-table or --use-existing-table") if not self.use_existing_table: self.create_table() self.populate_table() log.info('finished, closing connection') self.conn.close() except (socket.timeout, ThriftException, HBaseIOError) as _: die('ERROR: {0}'.format(_))
def check_git_tags_dockerfiles(self, target): target = os.path.abspath(target) gitroot = find_git_root(target) log.debug("finding tags for target '{0}'".format(target)) repo = git.Repo(gitroot) tags = [str(x).split("/")[-1] for x in repo.tags] if self.tag_prefix is not None: log.debug("restricting to tags matching tag prefix") tags = [x for x in tags if self.tag_prefix.match(x)] # if log.isEnabledFor(logging.DEBUG): log.debug("\n\ntags for target %s:\n\n%s\n", target, "\n".join(tags)) original_checkout = "master" try: try: original_checkout = repo.active_branch.name except TypeError as _: pass for tag in tags: log.debug("checking tag '%s' Dockerfiles for target '%s'", tag, target) try: repo.git.checkout(tag) except git.exc.GitCommandError as _: die(_) self.check_path(target, tag) except Exception as _: # pylint: disable=broad-except die(_) finally: log.debug("returning to original checkout '%s'", original_checkout) repo.git.checkout(original_checkout)
def get_tables(self): try: return self.conn.tables() except socket.timeout as _: die('ERROR while trying to get table list: {0}'.format(_)) except thrift.transport.TTransport.TTransportException as _: die('ERROR while trying to get table list: {0}'.format(_))
def run(self): if not self.args: self.usage('no git directory args given') self.origin = self.get_opt('origin') args = uniq_list_ordered(self.args) self.branch_prefix = self.get_opt('branch_prefix') if self.branch_prefix is not None: validate_regex(self.branch_prefix, 'branch prefix') self.branch_prefix = re.compile(self.branch_prefix) for arg in args: if not os.path.exists(arg): print("'%s' not found" % arg) sys.exit(ERRORS['WARNING']) if os.path.isfile(arg): log_option('file', arg) elif os.path.isdir(arg): log_option('directory', arg) else: die("path '%s' could not be determined as either a file or directory" % arg) for arg in args: self.check_git_branches_upstream(arg) if self.status == "OK": log.info('SUCCESS - All Git branches are tracking the expected upstream origin branches') else: log.critical('FAILED') sys.exit(ERRORS['CRITICAL'])
def check_git_branches_upstream(self, target): target = os.path.abspath(target) gitroot = find_git_root(target) if gitroot is None: die('Failed to find git root for target {0}'.format(target)) log.debug("finding branches for target '{0}'".format(target)) repo = git.Repo(gitroot) branches = repo.branches if self.branch_prefix is not None: log.debug('restricting to branches matching branch prefix') branches = [x for x in branches if self.branch_prefix.match(str(x))] if not branches: log.error("No branches matching '%s' for target '%s'", self.get_opt('branch_prefix'), target) self.status = 'NO BRANCHES' #if log.isEnabledFor(logging.DEBUG): #log.debug('\n\nbranches for target %s:\n\n%s\n', target, '\n'.join(list(branches))) for branch in branches: expected = '{0}/{1}'.format(self.origin, branch) tracking_branch = str(branch.tracking_branch()) if tracking_branch == expected: log.info("OK: repo '{0}' branch '{1}' is tracking '{2}'" .format(gitroot, branch, tracking_branch)) else: self.status = "ERROR" log.error("BAD: branch '{0}' is tracking '{1}' (expected '{2}')" .format(branch, tracking_branch, expected))
def process_args(self): # this resets DEBUG env var #log.setLevel(logging.INFO) self.no_args() self.host = self.get_opt('host') self.port = self.get_opt('port') validate_host(self.host) validate_port(self.port) self.table = self.get_opt('table') self.num_rows = self.get_opt('num') self.key_length = self.get_opt('key_length') self.value_length = self.get_opt('value_length') validate_database_tablename(self.table) validate_int(self.num_rows, 'num rows', 1, 1000000000) validate_int(self.key_length, 'key length', 10, 1000) validate_int(self.value_length, 'value length', 1, 1000000) self.num_rows = int(self.num_rows) self.skew = self.get_opt('skew') log_option('skew data', self.skew) self.skew_pc = self.get_opt('skew_percentage') validate_int(self.skew_pc, 'skew percentage', 0, 100) self.skew_pc = int(self.skew_pc) self.drop_table = self.get_opt('drop_table') self.use_existing_table = self.get_opt('use_existing_table') if self.drop_table and self.table != self.default_table_name: die("not allowed to use --drop-table if using a table name other than the default table '{0}'"\ .format(self.default_table_name))
def compact_table(self, table): log.info('major compacting table {0}'.format(table)) try: self.conn.compact_table(table, major=True) except socket.timeout as _: die('ERROR while trying to compact table \'{0}\': {1}'.format(table, _)) except thrift.transport.TTransport.TTransportException as _: die('ERROR while trying to compact table \'{0}\': {1}'.format(table, _))
def json_to_xml(content, filepath=None): try: _ = json.loads(content) except (KeyError, ValueError) as _: file_detail = '' if filepath is not None: file_detail = ' in file \'{0}\''.format(filepath) die("Failed to parse JSON{0}: {1}".format(file_detail, _)) return dicttoxml.dicttoxml(_)
def mac_get_arg(args): if not args: return '' if not isList(args): die("non-list '{args}' passed to mac_getent_passwd()".format(args=args)) if len(args) > 1: die('only one arg is supported on Mac at this time') arg = args[0] return arg
def check_parquet(self, filename): stderr = subprocess.PIPE if self.verbose > 2: stderr = None if not which("parquet-cat"): die("parquet-cat not found in $PATH") if subprocess.call(["parquet-cat", filename], stdout=subprocess.PIPE, stderr=stderr, shell=False) == 0: print(self.valid_parquet_msg) else: die(self.invalid_parquet_msg)
def xml_to_json(self, content, filepath=None): try: _ = xmltodict.parse(content) except xml.parsers.expat.ExpatError as _: file_detail = '' if filepath is not None: file_detail = ' in file \'{0}\''.format(filepath) die("Failed to parse XML{0}: {1}".format(file_detail, _)) json_string = json.dumps(_, sort_keys=True, indent=self.indent) #, separators=(',', ': ')) return json_string
def process_path(self, path): if path == '-' or os.path.isfile(path): self.process_file(path) elif os.path.isdir(path): for root, _, files in os.walk(path): for filename in files: filepath = os.path.join(root, filename) if self.re_json_suffix.match(filepath): self.process_file(filepath) else: die("failed to determine if path '%s' is a file or directory" % path)
def get_clusters(self): content = self.get('/clusters') clusters = set() try: _ = json.loads(content) for item in _['items']: cluster = item['Clusters']['cluster_name'] clusters.add(cluster) except (KeyError, ValueError) as _: die('failed to parse cluster name: {0}'.format(_) + support_msg_api()) return sorted(list(clusters))
def get_services(self): content = self.get('/clusters/{cluster}/services'.format(cluster=self.cluster)) services = set() try: _ = json.loads(content) for item in _['items']: service = item['ServiceInfo']['service_name'] services.add(service) except (KeyError, ValueError) as _: die('failed to parse services: {0}'.format(_) + support_msg_api()) return sorted(list(services))
def check_path(self, path): if path == '-' or os.path.isfile(path): self.check_file(path) elif os.path.isdir(path): for item in os.listdir(path): subpath = os.path.join(path, item) if os.path.isdir(subpath): self.check_path(subpath) elif self.re_csv_suffix.match(item): self.check_file(subpath) else: die("failed to determine if path '%s' is file or directory" % path)
def port_override(self, host): port = self.port if ':' in host: parts = host.split(':') if len(parts) == 2: port = parts[1] if not isPort(port): die('error in host definition, not a valid port number: \'{0}\''.format(host)) else: die('error in host definition, contains more than one colon: \'{0}\''.format(host)) host = parts[0] return (host, port)
def check_file(self, filename): self.filename = filename if self.filename == '-': self.filename = '<STDIN>' self.valid_ini_msg = '%s => INI OK' % self.filename self.invalid_ini_msg = '%s => INI INVALID' % self.filename if self.filename == '<STDIN>': log.debug('ini stdin') # TODO: should technically write to temp file to be able to seek(0) for print mode self.check_ini(sys.stdin) else: if self.is_excluded(filename): return if not self.is_included(filename): return log.debug('checking %s', self.filename) try: with open(self.filename) as iostream: self.check_ini(iostream) except IOError as _: die("ERROR: %s" % _)
def run(self): if self.get_opt('pretty'): log_option('pretty', True) self.indent = 4 if not self.args: self.args.append('-') for arg in self.args: if arg == '-': continue if not os.path.exists(arg): print("'%s' not found" % arg) sys.exit(ERRORS['WARNING']) if os.path.isfile(arg): log_option('file', arg) elif os.path.isdir(arg): log_option('directory', arg) else: die("path '%s' could not be determined as either a file or directory" % arg) for arg in self.args: self.process_path(arg)
def run(self): if not self.args: self.args.append('-') args = uniq_list_ordered(self.args) for arg in args: if arg == '-': continue if not os.path.exists(arg): print("'%s' not found" % arg) sys.exit(ERRORS['CRITICAL']) if os.path.isfile(arg): log_option('file', arg) elif os.path.isdir(arg): log_option('directory', os.path.abspath(arg)) else: die("path '%s' could not be determined as either a file or directory" % arg) for arg in args: self.check_path(arg) if self.failed: sys.exit(ERRORS['CRITICAL'])
def cmd(command): log.debug('command: %s', command) command_binary = command.split()[0] if not which(command_binary): die("command '{command}' not found in $PATH".format( command=command_binary)) try: process = subprocess.Popen(command.split(), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (stdout, _) = process.communicate() process.wait() log.debug('returncode: %s', process.returncode) log.debug('output: %s\n', stdout) return (stdout, process.returncode) except subprocess.CalledProcessError as _: log.debug('CalledProcessError Exception!') log.debug('returncode: %s', _.returncode) log.debug('output: %s\n', _.output) return (_.output, _.returncode)
def check_ping(host, count=None, wait=None): if count is None: count = 1 if wait is None: wait = 3 if not isInt(count): raise UnknownError("passed invalid count '{0}' to check_ping method, must be a valid integer!"\ .format(count)) if not isInt(wait): raise UnknownError("passed invalid wait '{0}' to check_ping method, must be a valid integer!"\ .format(wait)) log.info("pinging host '%s' (count=%s, wait=%s)", host, count, wait) count_switch = '-c' if platform.system().lower() == 'windows': count_switch = '-n' wait_switch = '-w' if platform.system().lower() == 'darwin': wait_switch = '-W' # causes hang if count / wait are not cast to string cmd = ['ping', count_switch, '{0}'.format(count), wait_switch, '{0}'.format(wait), host] log.debug('cmd: %s', ' '.join(cmd)) #log.debug('args: %s', cmd) try: process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) #log.debug('communicating') (stdout, stderr) = process.communicate() #log.debug('waiting for child process') process.wait() exitcode = process.returncode log.debug('stdout: %s', stdout) log.debug('stderr: %s', stderr) log.debug('exitcode: %s', exitcode) if exitcode == 0: log.info("host '%s' responded to ping", host) return host except subprocess.CalledProcessError as _: log.warn('ping failed: %s', _.output) except OSError as _: die('error calling ping: {0}'.format(_)) return None
def run(self): args = self.process_args() for arg in args: if not os.path.exists(arg): _ = "'%s' not found" % arg if self.skip_errors: print(_) self.failed = True else: die(_) if os.path.isfile(arg): log_option('file', arg) elif os.path.isdir(arg): log_option('directory', arg) else: die("path '%s' could not be determined as either a file or directory" % arg) for arg in args: try: self.check_path(arg) except OSError as _: if self.skip_errors: print(_) self.failed = True else: die(_) if self.failed: sys.exit(2)
def run(self): tables = self.get_tables() if not tables: die('No Tables Found') if self.get_opt('list_tables'): print('Tables:\n\n' + '\n'.join(tables)) sys.exit(3) tables_to_flush = set() if self.table_regex: log.info('filtering tables based on regex') for table in sorted(list(tables)): if self.table_regex.search(table): tables_to_flush.add(table) else: tables_to_flush = sorted(list(tables)) if log.isEnabledFor(logging.INFO): log.info('Flushing tables:\n\n%s\n', '\n'.join(tables_to_flush)) flush_commands = '\n'.join(["flush '{0}'".format(table) for table in tables_to_flush]) try: # by having stdout and stderr go to the same place more likely the output will be in a sane order process = subprocess.Popen(['hbase', 'shell'], stdin=PIPE, stdout=PIPE, stderr=subprocess.STDOUT) (stdout, _) = process.communicate(input=flush_commands) process.wait() if process.returncode != 0: print('ERROR:', end='') die(stdout) print(stdout) except OSError as _: die("OSError running hbase shell to flush tables: {0}".format(_)) except subprocess.CalledProcessError as _: print('Failed to get tables using HBase shell:\n') print(_.output) sys.exit(_.returncode)
def check_git_branches_dockerfiles(self, target): gitroot = find_git_root(target) if gitroot is None: die('Failed to find git root for target {0}'.format(target)) log.debug("finding branches for target '{0}'".format(target)) repo = git.Repo(gitroot) #branches = [str(x) for x in repo.refs if isinstance(x, git.refs.remote.RemoteReference)] branches = [str(x) for x in repo.refs if isinstance(x, git.Head)] branches = [x.split('/')[-1] for x in branches] branches = set(branches) branches = [x for x in branches if x not in ('HEAD', 'master')] self.branches = branches if self.branch_prefix is not None: log.debug('restricting to branches matching branch prefix') branches = [x for x in branches if self.branch_prefix.match(x)] self.selected_branches = branches #if log.isEnabledFor(logging.DEBUG): log.debug('\n\nbranches for target %s:\n\n%s\n', target, '\n'.join(branches)) original_checkout = 'master' try: try: original_checkout = repo.active_branch.name except TypeError as _: pass for branch in branches: log.debug("checking branch '%s' Dockerfiles for target '%s'", branch, target) self.branches_checked += 1 try: repo.git.checkout(branch) except git.exc.GitCommandError as _: die(_) self.check_path(target, branch) except Exception as _: # pylint: disable=broad-except traceback.print_exc() sys.exit(1) finally: log.debug("returning to original checkout '%s'", original_checkout) repo.git.checkout(original_checkout)
def check_selenium(self, browser): log.info("Connecting to '%s' for browser '%s'", self.hub_url, browser) driver = webdriver.Remote(command_executor=self.hub_url, desired_capabilities=getattr( DesiredCapabilities, browser)) log.info("Checking url '%s'", self.url) driver.get(self.url) content = driver.page_source title = driver.title driver.quit() if self.expected_regex: log.info("Checking url content matches regex") if not self.expected_regex.search(content): die('ERROR: Page source content failed regex search') elif self.expected_content: log.info("Checking url content matches '%s'", self.expected_content) if self.expected_content not in content: die('ERROR: Page source content failed content match') # not really recommended but in this case we cannot predict # what to expect on a random url if not specified by --content/--regex (provided in the default test case) # # https://www.selenium.dev/documentation/en/worst_practices/http_response_codes/ elif '404' in title: die('ERROR: Page title contains a 404 / error ' + '(if this is expected, specify --content / --regex to check instead): {}' .format(title)) log.info("Succeeded for browser '%s' against url '%s'", browser, self.url)
def print_results(self, term, limit=None): data = self.search(term, limit) results = {} longest_name = 8 try: # collect in dict to order by stars like normal docker search command for item in data['results']: star = item['star_count'] name = item['name'] if len(name) > longest_name: longest_name = len(name) if not isInt(star): die("star count '{0}' for repo '{1}' is not an integer! {2}" .format(star, name, support_msg_api())) results[star] = results.get(star, {}) results[star][name] = results[star].get(name, {}) result = {} result['description'] = item['description'] result['official'] = '[OK]' if item['is_official'] else '' # docker search doesn't output this so neither will I #result['trusted'] = result['is_trusted'] result['automated'] = '[OK]' if item['is_automated'] else '' results[star][name] = result # mimicking out spacing from 'docker search' command if not self.quiet: print('{0:{5}s} {1:45s} {2:7s} {3:8s} {4:10s}'.format( 'NAME', 'DESCRIPTION', 'STARS', 'OFFICIAL', 'AUTOMATED', longest_name)) except KeyError as _: die('failed to parse results fields from data returned by DockerHub ' + '(format may have changed?): {0}'.format(_)) except IOError as _: if str(_) == '[Errno 32] Broken pipe': pass else: raise def truncate(mystr, length): if len(mystr) > length: mystr = mystr[0:length - 3] + '...' return mystr for star in reversed(sorted(results)): for name in sorted(results[star]): if self.quiet: print(name.encode('utf-8')) else: desc = truncate(results[star][name]['description'], 45) print('{0:{5}s} {1:45s} {2:<7d} {3:8s} {4:10s}'. format(name.encode('utf-8'), desc.encode('utf-8'), star, results[star][name]['official'], results[star][name]['automated'], longest_name)) if self.verbose and not self.quiet: try: print('\nResults Shown: {0}\nTotal Results: {1}'.format( len(data['results']), data['num_results'])) except KeyError as _: die('failed to parse get total results count from data returned by DockerHub ' + '(format may have changed?): {0}'.format(_))
def get_tables(self): log.info('getting table list') try: process = subprocess.Popen(['hbase', 'shell'], stdin=PIPE, stdout=PIPE, stderr=subprocess.STDOUT) (stdout, _) = process.communicate('list') process.wait() if process.returncode != 0: print('ERROR:', end='') die(stdout) lines = stdout.split('\n') lineno = 1 for line in lines: if self.table_list_header_regex.search(line): break lineno += 1 if lineno > len(lines): die("Failed to parse table list output (couldn't find the starting line TABLE)") tables = set() for line in lines[lineno:]: if self.table_list_end_regex.search(line): break line = line.strip() if not line: continue tables.add(line) return tables except OSError as _: die("OSError running hbase shell to list tables: {0}".format(_)) except subprocess.CalledProcessError as _: print('Failed to get tables using HBase shell:\n') print(_.output) sys.exit(_.returncode)
def run(self): if not self.args: self.usage('no Dockerfile / directory args given') args = uniq_list_ordered(self.args) self.branch_prefix = self.get_opt('branch_prefix') if self.branch_prefix is not None: validate_regex(self.branch_prefix, 'branch prefix') self.branch_prefix = re.compile(self.branch_prefix) for arg in args: if not os.path.exists(arg): print("'%s' not found" % arg) sys.exit(ERRORS['WARNING']) if os.path.isfile(arg): log_option('file', arg) elif os.path.isdir(arg): log_option('directory', arg) else: die("path '%s' could not be determined as either a file or directory" % arg) for arg in args: self.check_git_branches_dockerfiles(arg) branches_skipped = len(self.branches_skipped) if branches_skipped > 0: log.warn( '{0} branches skipped for not matching expected naming format'. format(branches_skipped)) log.info('{0} Dockerfiles checked across {1} branches'.format( len(self.dockerfiles_checked), self.branches_checked)) branches_failed = len(self.branches_failed) _ = '{0} Dockerfiles failed validation across {1} branches'.format( self.dockerfiles_failed, branches_failed) if branches_failed > 0: log.error(_) else: log.info(_) if self.failed: log.error('Dockerfile validation FAILED') sys.exit(ERRORS['CRITICAL']) log.info('Dockerfile validation SUCCEEDED')
def check_file(self, filename): if filename == '-': filename = '<STDIN>' self.valid_parquet_msg = '%s => Parquet OK' % filename self.invalid_parquet_msg = '%s => Parquet INVALID' % filename if filename == '<STDIN>': try: tmp = tempfile.NamedTemporaryFile() log.debug('created tmp file from stdin: %s', tmp.name) tmp.write(sys.stdin.read()) tmp.seek(0) self.check_parquet(tmp.name) tmp.close() except IOError as _: die("ERROR: %s" % _) else: if self.is_excluded(filename): return try: self.check_parquet(filename) except IOError as _: die("ERROR: %s" % _)
def mac_getent_group_name(self, group): log.info('mac_getent_group_name(%s)', group) command = 'dscl . -read /Groups/{group}'.format(group=group) (output, returncode) = self.cmd(command) gid = password = name = members = '' #log.info('parsing output for group conversion') output = output.split('\n') for index, line in enumerate(output): tokens = line.split() if len(tokens) < 1: continue field = tokens[0] if len(tokens) < 2: value = '' else: value = tokens[1] if field == 'PrimaryGroupID:': gid = value elif field == 'Password:'******'x' elif field == 'RealName:': name = value if not value and len(output) > index + 1 and output[ index + 1].startswith(' '): name = output[index + 1].strip() elif not name and field == 'RecordName:': name = value elif field == 'GroupMembership:': members = ','.join(tokens[1:]) if not gid: return ('', returncode) getent_record = '{gid}:{password}:{name}:{members}'.format\ (gid=gid, password=password, name=name, members=members) if not isInt(gid, allow_negative=True): die("parsing error: GID '{gid}' is not numeric in record {record}!" .format(gid=gid, record=getent_record)) return (getent_record, returncode)
def run(self): # might have to use compat / transport / protocol args for older versions of HBase or if protocol has been # configured to be non-default, see: # http://happybase.readthedocs.io/en/stable/api.html#connection try: log.info('connecting to HBase Thrift Server at {0}:{1}'.format( self.host, self.port)) self.conn = happybase.Connection(host=self.host, port=self.port, timeout=10 * 1000) # ms tables = self.get_tables() # of course there is a minor race condition here between getting the table list, checking and creating # not sure if it's solvable, if you have any idea of how to solve it please let me know, even locking # would only protect again multiple runs of this script on the same machine... if self.table in tables: if self.drop_table: log.info( "table '%s' already existed but -d / --drop-table was specified, removing table first", self.table) self.conn.delete_table(self.table, disable=True) # wait up to 30 secs for table to be deleted #for _ in range(30): # if self.table not in self.get_tables(): # break # log.debug('waiting for table to be deleted before creating new one') # time.sleep(1) elif self.use_existing_table: pass else: die("WARNING: table '{0}' already exists, will not send data to a pre-existing table for safety"\ .format(self.table) + ". You can choose to either --drop-table or --use-existing-table") if not self.use_existing_table: self.create_table() self.populate_table() log.info('finished, closing connection') self.conn.close() except (socket.timeout, ThriftException, HBaseIOError) as _: die('ERROR: {0}'.format(_))
def check_file(self, filename, tag): filename = os.path.abspath(filename) if os.path.basename(filename) != 'Dockerfile': return True parent = os.path.basename(os.path.dirname(filename)) tag_base = tag.rsplit('-', 1)[0] if parent.lower() != tag_base.lower(): log.debug("skipping '{0}' as it's parent directory '{1}' doesn't match tag base '{2}'". format(filename, parent, tag_base)) return True self.valid_git_tags_msg = '%s => Dockerfile Git Tags OK' % filename self.invalid_git_tags_msg = "%s => Dockerfile Git Tags MISMATCH in tag '%s'" % (filename, tag) try: if not self.check_dockerfile_arg(filename, tag): self.failed = True #print(self.invalid_git_tags_msg) return False # now switched to per tag scan this returns way too much redundant output #print(self.valid_git_tags_msg) except IOError as _: die("ERROR: %s" % _) return True
def cancel_service_checks(self): log.info('cancelling all requests matching service check context') request_ids = self.get_request_ids() #re_context = re.compile(r'.+ Service Check \(batch \d+ of \d+\)', re.I) cancel_payload = '{"Requests":{"request_status":"ABORTED","abort_reason":"Aborted by user"}}' for request_id in request_ids: content = self.get( '/clusters/{cluster}/requests/{request_id}'.format( cluster=self.cluster, request_id=request_id)) try: _ = json.loads(content) request_context = _['Requests']['request_context'] if 'Service Check' in request_context: log.info('cancelling request_id %s (%s)', request_id, request_context) self.put( '/clusters/{cluster}/requests/{request_id}'.format( cluster=self.cluster, request_id=request_id), data=cancel_payload) except (KeyError, ValueError) as _: die('failed to parse response for request_id {0}. '.format( request_id) + support_msg_api())
def request_service_checks(self, services): log.debug('requesting service checks for services: %s', services) if not isList(services): code_error('non-list passed to request_service_checks') url_suffix = '/clusters/{cluster}/request_schedules'.format( cluster=self.cluster) payload = self.gen_payload(services) log.info('sending batch schedule check request for services: ' + ', '.join(services)) content = self.post(url_suffix=url_suffix, data=payload) try: _ = json.loads(content) request_schedule_id = _['resources'][0]['RequestSchedule']['id'] log.info('RequestSchedule %s submitted', request_schedule_id) href = _['resources'][0]['href'] assert href == self.url_base.rstrip('/') + '/clusters/{0}/request_schedules/{1}'\ .format(self.cluster, request_schedule_id) if self.watch: self.watch_scheduled_request(request_schedule_id) except (KeyError, ValueError) as _: die('parsing schedule request response failed: ' + str(_) + '. ' + support_msg_api())
def check_file(self, filename): if filename == '-': filename = '<STDIN>' self.valid_ldif_msg = '%s => LDIF OK' % filename self.invalid_ldif_msg = '%s => LDIF INVALID' % filename if filename == '<STDIN>': log.debug('checking <STDIN>') #self.check_ldif(sys.stdin.read()) self.check_ldif(sys.stdin) else: if self.is_excluded(filename): return try: log.debug("checking '%s'", filename) with open(filename, 'rb') as iostream: #content = iostream.read() #self.check_ldif(content) self.check_ldif(iostream) except IOError as _: die("ERROR: %s" % _) if self.failed: sys.exit(2)
def process_file(self, filename, file_handle): for line in file_handle: # log.debug(line) match = self.re_line.match(line) if not match: err_msg = "ERROR in file '{0}' on line: {1}".format( filename, line) if not self.skip_errors: die(err_msg) printerr() log.warn(err_msg) continue metric = match.group(1) timestamp = match.group(2) # don't have a need for this right now # value = match.group(3) tags = match.group(4) key = metric if self.include_timestamps: timestamp = int(timestamp) # remove millis if len(str(timestamp)) >= 15: timestamp = round(timestamp / 1000) hour = time.strftime('%Y-%m-%d %H:00', time.gmtime(timestamp)) key += ' ' + hour for tag in sorted(tags.split()): key += ' ' + tag.strip() if self.prefix_length is None: prefix = key else: prefix = key[0:min(self.prefix_length, len(key))] # prefix = self.bytes_to_str(prefix) if not self.keys.get(prefix): self.keys[prefix] = {'count': 0} self.keys[prefix]['count'] += 1 self.total_keys += 1 if self.verbose < 2 and self.total_keys % 10000 == 0: print('.', file=sys.stderr, end='')
def check_path(self, path, branch): status = True (branch_base, _) = self.branch_version(branch) branch_normalized_name = self.normalize_name(branch_base) log.debug('branch normalized name: %s', branch_normalized_name) if os.path.isfile(path): return self.check_file(path, branch) elif os.path.isdir(path): if os.path.basename(path) == '.git': return True for item in os.listdir(path): subpath = os.path.join(path, item) if os.path.islink(subpath): subpath = os.path.realpath(subpath) if os.path.isdir(subpath): subpath_base = os.path.basename(subpath) #log.debug('subpath_base = %s', subpath_base) if self.normalize_name( subpath_base) == branch_normalized_name: if not self.check_path(subpath, branch): status = False elif os.path.isfile(subpath): if not self.check_file(subpath, branch): status = False elif not os.path.exists(subpath): log.debug( "subpath '%s' does not exist in branch '%s', skipping..." % (subpath, branch)) else: die("failed to determine if subpath '%s' is file or directory in branch '%s'" % (subpath, branch)) elif not os.path.exists(path): log.debug("path '%s' does not exist in branch '%s', skipping..." % (path, branch)) else: die("failed to determine if path '%s' is file or directory in branch '%s'" % (path, branch)) return status
def check(self, filename): if filename == '-': filename = '<STDIN>' self.filename = filename single_quotes = '(found single quotes not double quotes)' self.valid_json_msg_single_quotes = '{0} {1}'.format( self.valid_json_msg, single_quotes) self.invalid_json_msg_single_quotes = '{0} {1}'.format( self.invalid_json_msg, single_quotes) if filename == '<STDIN>': self.iostream = sys.stdin if self.get_opt('multi_record'): if not self.check_multirecord_json(): self.failed = True self.msg = self.invalid_json_msg if not self.passthru: die(self.msg) else: self.check_json(sys.stdin.read()) else: self.check_file(filename) if self.failed: sys.exit(2)
def parse_scheduled_request(content): try: _ = json.loads(content) if _['RequestSchedule']['last_execution_status'] == 'COMPLETED': log.info('COMPLETED') return 'COMPLETED' for item in _['RequestSchedule']['batch']['batch_requests']: request_status = 'NO STATUS YET' if 'request_status' in item: request_status = item['request_status'] if request_status == 'COMPLETED': continue request_body = item['request_body'] request_body_dict = json.loads(request_body) command = request_body_dict['RequestInfo']['command'] context = request_body_dict['RequestInfo']['context'] log.info('{request_status}: {command}: {context}'.format(request_status=request_status, command=command, context=context)) if request_status != 'ABORTED': return 'IN_PROGRESS' except (KeyError, ValueError) as _: die('parsing schedule request status failed: ' + str(_) + '. ' + support_msg_api())
def populate_row_counts(self, table_conn): if not self.conn.is_table_enabled(self.table): die("table '{0}' is not enabled".format(self.table)) log.info('getting row counts (this may take a long time)') #rows = table_conn.scan(columns=[]) rows = table_conn.scan() # columns=[]) doesn't return without cf if self.verbose < 2: print('progress dots (one per 10,000 rows): ', file=sys.stderr, end='') for row in rows: #log.debug(row) key = row[0] prefix = key[0:min(self.prefix_length, len(key))] prefix = self.bytes_to_str(prefix) if not self.rows.get(prefix): self.rows[prefix] = {'row_count': 0} self.rows[prefix]['row_count'] += 1 self.total_rows += 1 if self.verbose < 2 and self.total_rows % 10000 == 0: print('.', file=sys.stderr, end='') if self.verbose < 2: print(file=sys.stderr)
def run(self): # might have to use compat / transport / protocol args for older versions of HBase or if protocol has been # configured to be non-default, see: # http://happybase.readthedocs.io/en/stable/api.html#connection try: log.info('connecting to HBase Thrift Server at %s:%s', self.host, self.port) self.conn = happybase.Connection(host=self.host, port=self.port, timeout=10 * 1000) # ms tables = self.get_tables() if self.get_opt('list_tables'): print('Tables:\n\n' + '\n'.join(tables)) sys.exit(3) if self.table not in tables: die("HBase table '{0}' does not exist!".format(self.table)) table_conn = self.conn.table(self.table) self.populate_row_counts(table_conn) self.calculate_row_count_widths() self.calculate_row_percentages() self.print_table_row_prefix_counts() self.print_summary() log.info('finished, closing connection') self.conn.close() except (socket.timeout, ThriftException, HBaseIOError) as _: die('ERROR: {0}'.format(_))
def check_file(self, filename): self.filename = filename if self.filename == '-': self.filename = '<STDIN>' self.valid_toml_msg = '%s => TOML OK' % self.filename self.invalid_toml_msg = '%s => TOML INVALID' % self.filename if self.filename == '<STDIN>': log.debug('toml stdin') self.check_toml(sys.stdin) else: if self.is_excluded(filename): return if not self.is_included(filename): return log.debug('checking %s', self.filename) try: if self.check_toml(filename): print(self.valid_toml_msg) else: print(self.invalid_toml_msg) sys.exit(2) except IOError as _: die("ERROR: %s" % _)
def local_main(self, table_conn): self.no_region_col = self.get_opt('no_region_name') self.sort = self.get_opt('sort') self.sort_desc = self.get_opt('desc') if self.sort is not None: self.sort = self.sort.lower() if self.sort not in self.valid_sorts: self.usage('invalid --sort option given, must be one of: {0}'.format(', '.join(self.valid_sorts))) log_option('no region name', self.no_region_col) log_option('sort', self.sort) if self.no_region_col: self.total_width -= self.region_width num_regions = len(self._regions) # sanity check and protect against division by zero in summary stats if num_regions < 1: die('number of regions detected = {0:d} (< 1)'.format(num_regions)) self.populate_region_metadata() self.calculate_widths() self.populate_row_counts(table_conn) self.calculate_row_count_widths() self.calculate_row_percentages() self.print_table_region_row_counts() self.print_summary()
def query(self, url): log.debug('GET %s' % url) try: verify = True # workaround for Travis CI and older pythons - we're not exchanging secret data so this is ok #if os.getenv('TRAVIS'): # verify = False if os.getenv('SSL_NOVERIFY') == '1': log.warn('disabling SSL verification') verify = False auth = None if self.user and self.password: auth = (self.user, self.password) log.debug('setting basic HTTP authenication using username: %s, password: <omitted>', self.user) req = requests.get(url, auth=auth, verify=verify) except requests.exceptions.RequestException as _: die(_) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", '='*80, req.content.strip(), '='*80) if req.status_code != 200: die("%s %s" % (req.status_code, req.reason)) if not isJson(req.content): die('invalid non-JSON response from Docker Registry!') if log.isEnabledFor(logging.DEBUG): print(jsonpp(req.content)) print('='*80) tag_list = [] try: json_data = json.loads(req.content) # DockerHub returns like this if 'results' in json_data: tag_list = [result['name'] for result in json_data['results']] # Docker Registry returns like this elif 'tags' in json_data: tag_list = json_data['tags'] else: raise UnknownError('failed to parse response, found neither results nor tags fields. {0}'\ .format(support_msg_api())) # could perhaps stack overflow in some scenario # not as functional programming 'cool' but will do own tail recursion and just while loop instead next_page_url = None if 'next' in json_data and json_data['next']: # tag_list += self.query(json_data['next']) next_page_url = json_data['next'] return (tag_list, next_page_url) except KeyError as _: die('failed to parse output from Docker Registry (format may have changed?): {0}'.format(_))
def connection(self, host, port, user, password, ssl=False, **kwargs): # must set X-Requested-By in newer versions of Ambari self.x_requested_by = user if user == 'admin': self.x_requested_by = os.getenv('USER', user) #log.info("contacting Ambari as '%s'" % self.user) if not isHost(host) or not isPort(port) or not isUser( user) or not password: raise InvalidOptionException( 'invalid options passed to AmbariBlueprint()') proto = 'http' # pylint: disable=unused-variable,possibly-unused-variable if ssl: proto = 'https' self.host = host self.port = port self.user = user self.password = password # if kwargs.has_key('strip_config') and kwargs['strip_config']: if 'strip_config' in kwargs and kwargs['strip_config']: self.strip_config = True self.url_base = '%(proto)s://%(host)s:%(port)s/api/v1' % locals() if 'dir' in kwargs and kwargs['dir']: self.blueprint_dir = kwargs['dir'] if not isDirname(self.blueprint_dir): qquit('UNKNOWN', 'invalid dir arg passed to AmbariBlueprintTool') try: if not self.blueprint_dir or not os.path.exists( self.blueprint_dir): log.info("creating blueprint data dir '%s'" % self.blueprint_dir) os.mkdir(self.blueprint_dir) if not os.path.isdir(self.blueprint_dir): raise IOError( "blueprint dir '%s'already taken and is not a directory" % self.blueprint_dir) except IOError as _: die("'failed to create dir '%s': %s" % (self.blueprint_dir, _))
def check_dockerfile_arg(self, filename, tag): log.debug('check_dockerfile_arg({0}, {1})'.format(filename, tag)) tag_base = str(tag).replace('-dev', '') (tag_base, tag_version) = tag_base.rsplit('-', 1) log.debug('tag_base = {0}'.format(tag_base)) log.debug('tag_version = {0}'.format(tag_version)) with open(filename) as filehandle: for line in filehandle: #log.debug(line.strip()) argversion = self.arg_regex.match(line.strip()) if argversion: log.debug("found arg '%s'", argversion.group(0)) log.debug("checking arg group 1 '%s' == tag_base '%s'", argversion.group(1), tag_base) if argversion.group(1).lower() == tag_base.lower().replace( '-', '_'): log.debug("arg '%s' matches tag base '%s'", argversion.group(1), tag_base) log.debug( "comparing '%s' contents to version derived from tag '%s' => '%s'", filename, tag, tag_version) if not isVersion(tag_version): die("unrecognized tag version '{0}' for tag_base '{1}'" .format(tag_version, tag_base)) found_version = argversion.group(2) #if tag_version == found_version or tag_version == found_version.split('.', 1)[0]: if found_version[0:len(tag_version)] == tag_version: log.info( "{0} (tag version '{1}' matches arg version '{2}')" .format(self.valid_git_tags_msg, tag_version, found_version)) return True log.error('{0} ({1} tag vs {2} Dockerfile ARG)'.format( self.invalid_git_tags_msg, tag_version, found_version)) return False return True