Ejemplo n.º 1
0
 def check_path(self, path):
     if os.path.isfile(path):
         # files given explicitly are checked regardless
         # if self.regex and self.regex.search(path):
         self.check_media_file(path)
     elif os.path.isdir(path):
         listing = []
         try:
             listing = os.listdir(path)
             listing = [x for x in listing if x[0] != '.']
         except OSError as _:
             if self.skip_errors:
                 print(_)
                 self.failed = True
             else:
                 die(_)
         for item in listing:
             subpath = os.path.join(path, item)
             if os.path.isdir(subpath):
                 self.check_path(subpath)
             elif self.regex:
                 if self.regex.search(item):
                     self.check_media_file(subpath)
             elif self.re_media_suffix.match(item):
                 self.check_media_file(subpath)
     else:
         die("failed to determine if path '%s' is file or directory" % path)
Ejemplo n.º 2
0
 def check_multirecord_json(self):
     log.debug('check_multirecord_json()')
     for line in self.iostream:
         if isJson(line):
             # can't use self.print() here, don't want to print valid for every line of a file / stdin
             if self.passthru:
                 print(line, end='')
         elif isJson(line.replace("'", '"')):
             if self.permit_single_quotes:
                 log.debug('valid multirecord json (single quoted)')
                 # self.single_quotes_detected = True
                 if self.passthru:
                     print(line, end='')
             else:
                 log.debug('invalid multirecord json (single quoted)')
                 self.failed = True
                 if not self.passthru:
                     die('%s (multi-record format)' % self.invalid_json_msg_single_quotes)
                 return False
         else:
             log.debug('invalid multirecord json')
             self.failed = True
             return False
     # self.multi_record_detected = True
     log.debug('multirecord json (all lines passed)')
     if not self.passthru:
         print('%s (multi-record format)' % self.valid_json_msg)
     return True
Ejemplo n.º 3
0
    def run(self):
        self.no_args()
        json_file = self.options.json
        avro_dir = self.options.avro_dir
        # let Spark fail if json/avro dir aren't available
        # can't check paths exist as want to remain generically portable
        # to HDFS, local filesystm or any other uri scheme Spark supports
        log.info("Json Source: %s" % json_file)
        log.info("Avro Destination: %s" % avro_dir)

        conf = SparkConf().setAppName('HS PySpark Json => Avro')
        sc = SparkContext(conf=conf) # pylint: disable=invalid-name
        sqlContext = SQLContext(sc)  # pylint: disable=invalid-name
        spark_version = sc.version
        log.info('Spark version detected as %s' % spark_version)

        if not isVersionLax(spark_version):
            die("Spark version couldn't be determined. " + support_msg('pytools'))

        #  pylint: disable=invalid-name
        df = None
        if isMinVersion(spark_version, 1.4):
            df = sqlContext.read.json(json_file)
        else:
            die('Spark <= 1.3 is not supported due to avro dependency, sorry! ' + \
                'I may change this on request but prefer people just upgrade')
            # log.warn('running legacy code for Spark <= 1.3')
            #json = sqlContext.jsonFile(json_file)
        # this doesn't work in Spark <= 1.3 and the github docs don't mention the older methods for writing avro using
        # the databricks avro driver
        df.write.format('com.databricks.spark.avro').save(avro_dir)
Ejemplo n.º 4
0
 def check_media_file(self, filename):
     valid_media_msg = '%s => OK' % filename
     invalid_media_msg = '%s => INVALID' % filename
     try:
         # cmd = self.validate_cmd.format(filename)
         cmd = self.validate_cmd
         log.debug('cmd: %s %s', cmd, filename)
         log.info('verifying {0}'.format(filename))
         # capturing stderr to stdout because ffprobe prints to stderr in all cases
         # Python 2.7+
         #subprocess.check_output(cmd.split() + [filename], stderr=subprocess.STDOUT)
         proc = subprocess.Popen(cmd.split() + [filename], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
         (stdout, _) = proc.communicate()
         returncode = proc.wait()
         if returncode != 0 or (stdout is not None and 'Error' in stdout):
             _ = CalledProcessError(returncode, cmd)
             _.output = stdout
             raise _
         print(valid_media_msg)
     except CalledProcessError as _:
         if self.verbose > 2:
             print(_.output)
         if self.skip_errors:
             print(invalid_media_msg)
             self.failed = True
             return False
         die(invalid_media_msg)
Ejemplo n.º 5
0
 def connection(self, host, port, user, password, ssl=False, **kwargs):
     # must set X-Requested-By in newer versions of Ambari
     self.x_requested_by = user
     if user == 'admin':
         self.x_requested_by = os.getenv('USER', user)
     #log.info("contacting Ambari as '%s'" % self.user)
     if not isHost(host) or not isPort(port) or not isUser(user) or not password:
         raise InvalidOptionException('invalid options passed to AmbariBlueprint()')
     proto = 'http' # pylint: disable=unused-variable
     if ssl:
         proto = 'https'
     self.host = host
     self.port = port
     self.user = user
     self.password = password
     # if kwargs.has_key('strip_config') and kwargs['strip_config']:
     if 'strip_config' in kwargs and kwargs['strip_config']:
         self.strip_config = True
     self.url_base = '%(proto)s://%(host)s:%(port)s/api/v1' % locals()
     if 'dir' in kwargs and kwargs['dir']:
         self.blueprint_dir = kwargs['dir']
     if not isDirname(self.blueprint_dir):
         qquit('UNKNOWN', 'invalid dir arg passed to AmbariBlueprintTool')
     try:
         if not self.blueprint_dir or not os.path.exists(self.blueprint_dir):
             log.info("creating blueprint data dir '%s'" % self.blueprint_dir)
             os.mkdir(self.blueprint_dir)
         if not os.path.isdir(self.blueprint_dir):
             raise IOError("blueprint dir '%s'already taken and is not a directory" % self.blueprint_dir)
     except IOError as _:
         die("'failed to create dir '%s': %s" % (self.blueprint_dir, _))
Ejemplo n.º 6
0
 def check_path(self, path):
     # os.walk returns nothing if path is a file, and must store file names, sizes, checksums and regex captures
     # even for standalone file args
     if os.path.isfile(path):
         self.check_file(path)
     elif os.path.isdir(path):
         # returns generator
         # root is the dir, dirs and files are child basenames
         #for root, dirs, files in os.walk(path):
         for root, dirs, files in os.walk(path):
             #log.debug('root = %s', root)
             #log.debug('files = %s', files)
             # do not check hidden subdirs
             if not self.include_dot_dirs:
                 # results in 'IndexError: string index out of range' if suffixed with '/'
                 # if os.path.basename(root)[0] == '.':
                 #    continue
                 # could regex strip all suffixed '/' but it's cheaper to just modify the dirs list in place
                 dirs[:] = [d for d in dirs if d[0] != '.']
             for filebasename in files:
                 filepath = os.path.join(root, filebasename)
                 try:
                     self.is_file_dup(filepath)
                 except OSError as exc:
                     log.error("error while checking file '{0}': {1}".format(filepath, exc))
                     self.failed = True
     else:
         die("'%s' is not a file or directory")
Ejemplo n.º 7
0
 def run(self):
     self.num_lines = self.options.num
     vlog_option('number of lines', self.num_lines)
     self.quiet = self.options.quiet
     vlog_option('quiet', self.quiet)
     if not self.args:
         self.args.append('-')
     for arg in self.args:
         if arg == '-':
             continue
         if not os.path.exists(arg):
             print("'%s' not found" % arg)
             sys.exit(ERRORS['WARNING'])
         if os.path.isfile(arg):
             vlog_option('file', arg)
         elif os.path.isdir(arg):
             vlog_option('directory', arg)
         else:
             die("path '%s' could not be determined as either a file or directory" % arg)
     for filename in self.args:
         if filename == '-':
             self.headtail(sys.stdin.read())
         else:
             with open(filename) as _:
                 self.headtail(_.read())
         if not self.quiet and len(self.args) > 1:
             print(self.docsep)
 def check_path(self, path, branch):
     status = True
     (branch_base, _) = self.branch_version(branch)
     if os.path.isfile(path):
         return self.check_file(path, branch)
     elif os.path.isdir(path):
         if os.path.basename(path) == '.git':
             return True
         for item in os.listdir(path):
             subpath = os.path.join(path, item)
             if os.path.islink(subpath):
                 subpath = os.path.realpath(subpath)
             if os.path.isdir(subpath):
                 subpath_base = os.path.basename(subpath)
                 #log.debug('subpath_base = %s', subpath_base)
                 if self.normalize_name(subpath_base) == self.normalize_name(branch_base):
                     if not self.check_path(subpath, branch):
                         status = False
             elif os.path.isfile(subpath):
                 if not self.check_file(subpath, branch):
                     status = False
             elif not os.path.exists(subpath):
                 log.debug("subpath '%s' does not exist in branch '%s', skipping..." % (subpath, branch))
             else:
                 die("failed to determine if subpath '%s' is file or directory in branch '%s'" % (subpath, branch))
     elif not os.path.exists(path):
         log.debug("path '%s' does not exist in branch '%s', skipping..." % (path, branch))
     else:
         die("failed to determine if path '%s' is file or directory in branch '%s'" % (path, branch))
     return status
 def check_path(self, path, tag):
     status = True
     if os.path.isfile(path):
         return self.check_file(path, tag)
     elif os.path.isdir(path):
         if os.path.basename(path) == ".git":
             return True
         for item in os.listdir(path):
             subpath = os.path.join(path, item)
             if os.path.islink(subpath):
                 subpath = os.path.realpath(subpath)
             if os.path.isdir(subpath):
                 tag_base = tag.rsplit("-", 1)[0]
                 subpath_base = os.path.basename(subpath)
                 # log.debug('tag_base = %s', tag_base)
                 # log.debug('subpath_base = %s', subpath_base)
                 if subpath_base == tag_base:
                     if not self.check_path(subpath, tag):
                         status = False
             elif os.path.isfile(subpath):
                 if not self.check_file(subpath, tag):
                     status = False
             elif not os.path.exists(subpath):
                 log.debug("subpath '%s' does not exist in tag '%s', skipping..." % (subpath, tag))
             else:
                 die("failed to determine if subpath '%s' is file or directory in tag '%s'" % (subpath, tag))
     elif not os.path.exists(path):
         log.debug("path '%s' does not exist in tag '%s', skipping..." % (path, tag))
     else:
         die("failed to determine if path '%s' is file or directory in tag '%s'" % (path, tag))
     return status
 def check_file(self, filename, tag):
     filename = os.path.abspath(filename)
     if os.path.basename(filename) != "Dockerfile":
         return True
     parent = os.path.basename(os.path.dirname(filename))
     tag_base = tag.rsplit("-", 1)[0]
     if parent.lower() != tag_base.lower():
         log.debug(
             "skipping '{0}' as it's parent directory '{1}' doesn't match tag base '{2}'".format(
                 filename, parent, tag_base
             )
         )
         return True
     self.valid_git_tags_msg = "%s => Dockerfile Git Tags OK" % filename
     self.invalid_git_tags_msg = "%s => Dockerfile Git Tags MISMATCH in tag '%s'" % (filename, tag)
     try:
         if not self.check_dockerfile_arg(filename, tag):
             self.failed = True
             # print(self.invalid_git_tags_msg)
             return False
         # now switched to per tag scan this returns way too much redundant output
         # print(self.valid_git_tags_msg)
     except IOError as _:
         die("ERROR: %s" % _)
     return True
 def check_git_branches_dockerfiles(self, target):
     gitroot = find_git_root(target)
     if gitroot is None:
         die('Failed to find git root for target {0}'.format(target))
     log.debug("finding branches for target '{0}'".format(target))
     repo = git.Repo(gitroot)
     branches = [str(x) for x in repo.refs if isinstance(x, git.refs.remote.RemoteReference)]
     branches = [x.split('/')[-1] for x in branches]
     branches = [x for x in branches if x not in ('HEAD', 'master')]
     if self.branch_prefix is not None:
         log.debug('restricting to branches matching branch prefix')
         branches = [x for x in branches if self.branch_prefix.match(x)]
     #if log.isEnabledFor(logging.DEBUG):
     log.debug('\n\nbranches for target %s:\n\n%s\n', target, '\n'.join(branches))
     original_checkout = 'master'
     try:
         try:
             original_checkout = repo.active_branch.name
         except TypeError as _:
             pass
         for branch in branches:
             log.debug("checking branch '%s' Dockerfiles for target '%s'", branch, target)
             self.branches_checked += 1
             try:
                 repo.git.checkout(branch)
             except git.exc.GitCommandError as _:
                 die(_)
             self.check_path(target, branch)
     except Exception as _:  # pylint: disable=broad-except
         traceback.print_exc()
         sys.exit(1)
     finally:
         log.debug("returning to original checkout '%s'", original_checkout)
         repo.git.checkout(original_checkout)
Ejemplo n.º 12
0
    def run(self):
        parquet_file = self.get_opt('parquet')
        avro_dir = self.get_opt('avro_dir')
        # let Spark fail if avro/parquet aren't available
        # can't check paths exist as want to remain generically portable
        # to HDFS, local filesystm or any other uri scheme Spark supports
        log.info("Parquet Source: %s" % parquet_file)
        log.info("Avro Destination: %s" % avro_dir)

        conf = SparkConf().setAppName('HS PySpark Parquet => Avro')
        sc = SparkContext(conf=conf) # pylint: disable=invalid-name
        sqlContext = SQLContext(sc)  # pylint: disable=invalid-name
        spark_version = sc.version
        log.info('Spark version detected as %s' % spark_version)

        if not isVersionLax(spark_version):
            die("Spark version couldn't be determined. " + support_msg('pytools'))

        #  pylint: disable=invalid-name
        if isMinVersion(spark_version, 1.4):
            # this doesn't work in Spark <= 1.3 - github docs don't mention the older .method() for writing avro
            df = sqlContext.read.parquet(parquet_file)
            df.write.format('com.databricks.spark.avro').save(avro_dir)
        else:
            die('Spark <= 1.3 is not supported due to avro dependency, sorry! ' + \
                'I may change this on request but prefer people just upgrade')
 def print_table_regions(self):
     print('=' * self.total_width)
     print('{0:{1}}{2}'.format(self.region_header,
                               self.region_width,
                               self.separator),
           end='')
     print('{0:{1}}{2}'.format(self.start_key_header,
                               self.start_key_width,
                               self.separator),
           end='')
     print('{0:{1}}{2}'.format(self.end_key_header,
                               self.end_key_width,
                               self.separator),
           end='')
     print('{0}'.format(self.server_header))
     print('=' * self.total_width)
     try:
         for region in self._regions:
             print('{0:{1}}{2}'.format(self.bytes_to_str(self.shorten_region_name(region['name'])),
                                       self.region_width,
                                       self.separator),
                   end='')
             print('{0:{1}}{2}'.format(self.bytes_to_str(region['start_key']),
                                       self.start_key_width,
                                       self.separator),
                   end='')
             print('{0:{1}}{2}'.format(self.bytes_to_str(region['end_key']),
                                       self.end_key_width,
                                       self.separator),
                   end='')
             print('{0}:{1}'.format(region['server_name'], region['port']))
     except KeyError as _:
         die('error parsing region info: {0}. '.format(_) + support_msg_api())
     print('\nNumber of Regions: {0:d}'.format(len(self._regions)))
 def check_version(self, filename, branch, branch_base, arg_var, found_version, branch_version):
     self.branches_dockerfile_checked.add(branch)
     self.dockerfiles_checked.add(filename)
     if arg_var:
         log.debug("found arg '%s'", arg_var)
         arg_version = "ARG '{0}={1}'".format(arg_var, found_version)
     else:
         arg_version = "'{0}'".format(found_version)
     #log.debug("arg '%s' matches branch base '%s'", argversion.group(1), branch_base)
     log.debug("comparing '%s' contents to version derived from branch '%s' => '%s'",
               filename, branch, branch_version)
     if not isVersion(branch_version.lstrip('jdk').lstrip('jre')):
         die("unrecognized branch version '{0}' for branch_base '{1}'"
             .format(branch_version, branch_base))
     #if branch_version == found_version or branch_version == found_version.split('.', 2)[0]:
     if found_version[0:len(branch_version)] == branch_version:
         log.info("{0} version '{1}' matches {2}".
                  format(self.valid_git_branches_msg, branch_version, arg_version))
     else:
         log.error("{0} version '{1}' vs Dockerfile {2}".
                   format(self.invalid_git_branches_msg, branch_version, arg_version))
         self.dockerfiles_failed += 1
         self.branches_failed.add(branch)
         return False
     return True
Ejemplo n.º 15
0
 def run(self):
     # might have to use compat / transport / protocol args for older versions of HBase or if protocol has been
     # configured to be non-default, see:
     # http://happybase.readthedocs.io/en/stable/api.html#connection
     try:
         log.info('connecting to HBase Thrift Server at {0}:{1}'.format(self.host, self.port))
         self.conn = happybase.Connection(host=self.host, port=self.port, timeout=10 * 1000)  # ms
         tables = self.get_tables()
         # of course there is a minor race condition here between getting the table list, checking and creating
         # not sure if it's solvable, if you have any idea of how to solve it please let me know, even locking
         # would only protect again multiple runs of this script on the same machine...
         if self.table in tables:
             if self.drop_table:
                 log.info("table '%s' already existed but -d / --drop-table was specified, removing table first",
                          self.table)
                 self.conn.delete_table(self.table, disable=True)
                 # wait up to 30 secs for table to be deleted
                 #for _ in range(30):
                 #    if self.table not in self.get_tables():
                 #        break
                 #    log.debug('waiting for table to be deleted before creating new one')
                 #    time.sleep(1)
             elif self.use_existing_table:
                 pass
             else:
                 die("WARNING: table '{0}' already exists, will not send data to a pre-existing table for safety"\
                     .format(self.table) +
                     ". You can choose to either --drop-table or --use-existing-table")
         if not self.use_existing_table:
             self.create_table()
         self.populate_table()
         log.info('finished, closing connection')
         self.conn.close()
     except (socket.timeout, ThriftException, HBaseIOError) as _:
         die('ERROR: {0}'.format(_))
 def check_git_tags_dockerfiles(self, target):
     target = os.path.abspath(target)
     gitroot = find_git_root(target)
     log.debug("finding tags for target '{0}'".format(target))
     repo = git.Repo(gitroot)
     tags = [str(x).split("/")[-1] for x in repo.tags]
     if self.tag_prefix is not None:
         log.debug("restricting to tags matching tag prefix")
         tags = [x for x in tags if self.tag_prefix.match(x)]
     # if log.isEnabledFor(logging.DEBUG):
     log.debug("\n\ntags for target %s:\n\n%s\n", target, "\n".join(tags))
     original_checkout = "master"
     try:
         try:
             original_checkout = repo.active_branch.name
         except TypeError as _:
             pass
         for tag in tags:
             log.debug("checking tag '%s' Dockerfiles for target '%s'", tag, target)
             try:
                 repo.git.checkout(tag)
             except git.exc.GitCommandError as _:
                 die(_)
             self.check_path(target, tag)
     except Exception as _:  # pylint: disable=broad-except
         die(_)
     finally:
         log.debug("returning to original checkout '%s'", original_checkout)
         repo.git.checkout(original_checkout)
Ejemplo n.º 17
0
 def get_tables(self):
     try:
         return self.conn.tables()
     except socket.timeout as _:
         die('ERROR while trying to get table list: {0}'.format(_))
     except thrift.transport.TTransport.TTransportException as _:
         die('ERROR while trying to get table list: {0}'.format(_))
 def run(self):
     if not self.args:
         self.usage('no git directory args given')
     self.origin = self.get_opt('origin')
     args = uniq_list_ordered(self.args)
     self.branch_prefix = self.get_opt('branch_prefix')
     if self.branch_prefix is not None:
         validate_regex(self.branch_prefix, 'branch prefix')
         self.branch_prefix = re.compile(self.branch_prefix)
     for arg in args:
         if not os.path.exists(arg):
             print("'%s' not found" % arg)
             sys.exit(ERRORS['WARNING'])
         if os.path.isfile(arg):
             log_option('file', arg)
         elif os.path.isdir(arg):
             log_option('directory', arg)
         else:
             die("path '%s' could not be determined as either a file or directory" % arg)
     for arg in args:
         self.check_git_branches_upstream(arg)
     if self.status == "OK":
         log.info('SUCCESS - All Git branches are tracking the expected upstream origin branches')
     else:
         log.critical('FAILED')
         sys.exit(ERRORS['CRITICAL'])
 def check_git_branches_upstream(self, target):
     target = os.path.abspath(target)
     gitroot = find_git_root(target)
     if gitroot is None:
         die('Failed to find git root for target {0}'.format(target))
     log.debug("finding branches for target '{0}'".format(target))
     repo = git.Repo(gitroot)
     branches = repo.branches
     if self.branch_prefix is not None:
         log.debug('restricting to branches matching branch prefix')
         branches = [x for x in branches if self.branch_prefix.match(str(x))]
         if not branches:
             log.error("No branches matching '%s' for target '%s'", self.get_opt('branch_prefix'), target)
             self.status = 'NO BRANCHES'
     #if log.isEnabledFor(logging.DEBUG):
     #log.debug('\n\nbranches for target %s:\n\n%s\n', target, '\n'.join(list(branches)))
     for branch in branches:
         expected = '{0}/{1}'.format(self.origin, branch)
         tracking_branch = str(branch.tracking_branch())
         if tracking_branch == expected:
             log.info("OK: repo '{0}' branch '{1}' is tracking '{2}'"
                      .format(gitroot, branch, tracking_branch))
         else:
             self.status = "ERROR"
             log.error("BAD: branch '{0}' is tracking '{1}' (expected '{2}')"
                       .format(branch, tracking_branch, expected))
Ejemplo n.º 20
0
    def process_args(self):
        # this resets DEBUG env var
        #log.setLevel(logging.INFO)
        self.no_args()
        self.host = self.get_opt('host')
        self.port = self.get_opt('port')
        validate_host(self.host)
        validate_port(self.port)

        self.table = self.get_opt('table')
        self.num_rows = self.get_opt('num')
        self.key_length = self.get_opt('key_length')
        self.value_length = self.get_opt('value_length')

        validate_database_tablename(self.table)
        validate_int(self.num_rows, 'num rows', 1, 1000000000)
        validate_int(self.key_length, 'key length', 10, 1000)
        validate_int(self.value_length, 'value length', 1, 1000000)

        self.num_rows = int(self.num_rows)

        self.skew = self.get_opt('skew')
        log_option('skew data', self.skew)
        self.skew_pc = self.get_opt('skew_percentage')
        validate_int(self.skew_pc, 'skew percentage', 0, 100)
        self.skew_pc = int(self.skew_pc)
        self.drop_table = self.get_opt('drop_table')
        self.use_existing_table = self.get_opt('use_existing_table')

        if self.drop_table and self.table != self.default_table_name:
            die("not allowed to use --drop-table if using a table name other than the default table '{0}'"\
                .format(self.default_table_name))
Ejemplo n.º 21
0
 def compact_table(self, table):
     log.info('major compacting table {0}'.format(table))
     try:
         self.conn.compact_table(table, major=True)
     except socket.timeout as _:
         die('ERROR while trying to compact table \'{0}\': {1}'.format(table, _))
     except thrift.transport.TTransport.TTransportException as _:
         die('ERROR while trying to compact table \'{0}\': {1}'.format(table, _))
Ejemplo n.º 22
0
 def json_to_xml(content, filepath=None):
     try:
         _ = json.loads(content)
     except (KeyError, ValueError) as _:
         file_detail = ''
         if filepath is not None:
             file_detail = ' in file \'{0}\''.format(filepath)
         die("Failed to parse JSON{0}: {1}".format(file_detail, _))
     return dicttoxml.dicttoxml(_)
Ejemplo n.º 23
0
 def mac_get_arg(args):
     if not args:
         return ''
     if not isList(args):
         die("non-list '{args}' passed to mac_getent_passwd()".format(args=args))
     if len(args) > 1:
         die('only one arg is supported on Mac at this time')
     arg = args[0]
     return arg
Ejemplo n.º 24
0
 def check_parquet(self, filename):
     stderr = subprocess.PIPE
     if self.verbose > 2:
         stderr = None
     if not which("parquet-cat"):
         die("parquet-cat not found in $PATH")
     if subprocess.call(["parquet-cat", filename], stdout=subprocess.PIPE, stderr=stderr, shell=False) == 0:
         print(self.valid_parquet_msg)
     else:
         die(self.invalid_parquet_msg)
Ejemplo n.º 25
0
 def xml_to_json(self, content, filepath=None):
     try:
         _ = xmltodict.parse(content)
     except xml.parsers.expat.ExpatError as _:
         file_detail = ''
         if filepath is not None:
             file_detail = ' in file \'{0}\''.format(filepath)
         die("Failed to parse XML{0}: {1}".format(file_detail, _))
     json_string = json.dumps(_, sort_keys=True, indent=self.indent) #, separators=(',', ': '))
     return json_string
Ejemplo n.º 26
0
 def process_path(self, path):
     if path == '-' or os.path.isfile(path):
         self.process_file(path)
     elif os.path.isdir(path):
         for root, _, files in os.walk(path):
             for filename in files:
                 filepath = os.path.join(root, filename)
                 if self.re_json_suffix.match(filepath):
                     self.process_file(filepath)
     else:
         die("failed to determine if path '%s' is a file or directory" % path)
 def get_clusters(self):
     content = self.get('/clusters')
     clusters = set()
     try:
         _ = json.loads(content)
         for item in _['items']:
             cluster = item['Clusters']['cluster_name']
             clusters.add(cluster)
     except (KeyError, ValueError) as _:
         die('failed to parse cluster name: {0}'.format(_) + support_msg_api())
     return sorted(list(clusters))
 def get_services(self):
     content = self.get('/clusters/{cluster}/services'.format(cluster=self.cluster))
     services = set()
     try:
         _ = json.loads(content)
         for item in _['items']:
             service = item['ServiceInfo']['service_name']
             services.add(service)
     except (KeyError, ValueError) as _:
         die('failed to parse services: {0}'.format(_) + support_msg_api())
     return sorted(list(services))
Ejemplo n.º 29
0
 def check_path(self, path):
     if path == '-' or os.path.isfile(path):
         self.check_file(path)
     elif os.path.isdir(path):
         for item in os.listdir(path):
             subpath = os.path.join(path, item)
             if os.path.isdir(subpath):
                 self.check_path(subpath)
             elif self.re_csv_suffix.match(item):
                 self.check_file(subpath)
     else:
         die("failed to determine if path '%s' is file or directory" % path)
Ejemplo n.º 30
0
 def port_override(self, host):
     port = self.port
     if ':' in host:
         parts = host.split(':')
         if len(parts) == 2:
             port = parts[1]
             if not isPort(port):
                 die('error in host definition, not a valid port number: \'{0}\''.format(host))
         else:
             die('error in host definition, contains more than one colon: \'{0}\''.format(host))
         host = parts[0]
     return (host, port)
Ejemplo n.º 31
0
 def check_file(self, filename):
     self.filename = filename
     if self.filename == '-':
         self.filename = '<STDIN>'
     self.valid_ini_msg = '%s => INI OK' % self.filename
     self.invalid_ini_msg = '%s => INI INVALID' % self.filename
     if self.filename == '<STDIN>':
         log.debug('ini stdin')
         # TODO: should technically write to temp file to be able to seek(0) for print mode
         self.check_ini(sys.stdin)
     else:
         if self.is_excluded(filename):
             return
         if not self.is_included(filename):
             return
         log.debug('checking %s', self.filename)
         try:
             with open(self.filename) as iostream:
                 self.check_ini(iostream)
         except IOError as _:
             die("ERROR: %s" % _)
Ejemplo n.º 32
0
 def run(self):
     if self.get_opt('pretty'):
         log_option('pretty', True)
         self.indent = 4
     if not self.args:
         self.args.append('-')
     for arg in self.args:
         if arg == '-':
             continue
         if not os.path.exists(arg):
             print("'%s' not found" % arg)
             sys.exit(ERRORS['WARNING'])
         if os.path.isfile(arg):
             log_option('file', arg)
         elif os.path.isdir(arg):
             log_option('directory', arg)
         else:
             die("path '%s' could not be determined as either a file or directory"
                 % arg)
     for arg in self.args:
         self.process_path(arg)
Ejemplo n.º 33
0
 def run(self):
     if not self.args:
         self.args.append('-')
     args = uniq_list_ordered(self.args)
     for arg in args:
         if arg == '-':
             continue
         if not os.path.exists(arg):
             print("'%s' not found" % arg)
             sys.exit(ERRORS['CRITICAL'])
         if os.path.isfile(arg):
             log_option('file', arg)
         elif os.path.isdir(arg):
             log_option('directory', os.path.abspath(arg))
         else:
             die("path '%s' could not be determined as either a file or directory"
                 % arg)
     for arg in args:
         self.check_path(arg)
     if self.failed:
         sys.exit(ERRORS['CRITICAL'])
Ejemplo n.º 34
0
 def cmd(command):
     log.debug('command: %s', command)
     command_binary = command.split()[0]
     if not which(command_binary):
         die("command '{command}' not found in $PATH".format(
             command=command_binary))
     try:
         process = subprocess.Popen(command.split(),
                                    stdin=subprocess.PIPE,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.STDOUT)
         (stdout, _) = process.communicate()
         process.wait()
         log.debug('returncode: %s', process.returncode)
         log.debug('output: %s\n', stdout)
         return (stdout, process.returncode)
     except subprocess.CalledProcessError as _:
         log.debug('CalledProcessError Exception!')
         log.debug('returncode: %s', _.returncode)
         log.debug('output: %s\n', _.output)
         return (_.output, _.returncode)
Ejemplo n.º 35
0
 def check_ping(host, count=None, wait=None):
     if count is None:
         count = 1
     if wait is None:
         wait = 3
     if not isInt(count):
         raise UnknownError("passed invalid count '{0}' to check_ping method, must be a valid integer!"\
                            .format(count))
     if not isInt(wait):
         raise UnknownError("passed invalid wait '{0}' to check_ping method, must be a valid integer!"\
                            .format(wait))
     log.info("pinging host '%s' (count=%s, wait=%s)", host, count, wait)
     count_switch = '-c'
     if platform.system().lower() == 'windows':
         count_switch = '-n'
     wait_switch = '-w'
     if platform.system().lower() == 'darwin':
         wait_switch = '-W'
     # causes hang if count / wait are not cast to string
     cmd = ['ping', count_switch, '{0}'.format(count), wait_switch, '{0}'.format(wait), host]
     log.debug('cmd: %s', ' '.join(cmd))
     #log.debug('args: %s', cmd)
     try:
         process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         #log.debug('communicating')
         (stdout, stderr) = process.communicate()
         #log.debug('waiting for child process')
         process.wait()
         exitcode = process.returncode
         log.debug('stdout: %s', stdout)
         log.debug('stderr: %s', stderr)
         log.debug('exitcode: %s', exitcode)
         if exitcode == 0:
             log.info("host '%s' responded to ping", host)
             return host
     except subprocess.CalledProcessError as _:
         log.warn('ping failed: %s', _.output)
     except OSError as _:
         die('error calling ping: {0}'.format(_))
     return None
Ejemplo n.º 36
0
 def run(self):
     args = self.process_args()
     for arg in args:
         if not os.path.exists(arg):
             _ = "'%s' not found" % arg
             if self.skip_errors:
                 print(_)
                 self.failed = True
             else:
                 die(_)
         if os.path.isfile(arg):
             log_option('file', arg)
         elif os.path.isdir(arg):
             log_option('directory', arg)
         else:
             die("path '%s' could not be determined as either a file or directory"
                 % arg)
     for arg in args:
         try:
             self.check_path(arg)
         except OSError as _:
             if self.skip_errors:
                 print(_)
                 self.failed = True
             else:
                 die(_)
     if self.failed:
         sys.exit(2)
Ejemplo n.º 37
0
 def run(self):
     tables = self.get_tables()
     if not tables:
         die('No Tables Found')
     if self.get_opt('list_tables'):
         print('Tables:\n\n' + '\n'.join(tables))
         sys.exit(3)
     tables_to_flush = set()
     if self.table_regex:
         log.info('filtering tables based on regex')
         for table in sorted(list(tables)):
             if self.table_regex.search(table):
                 tables_to_flush.add(table)
     else:
         tables_to_flush = sorted(list(tables))
     if log.isEnabledFor(logging.INFO):
         log.info('Flushing tables:\n\n%s\n', '\n'.join(tables_to_flush))
     flush_commands = '\n'.join(["flush '{0}'".format(table) for table in tables_to_flush])
     try:
         # by having stdout and stderr go to the same place more likely the output will be in a sane order
         process = subprocess.Popen(['hbase', 'shell'], stdin=PIPE, stdout=PIPE, stderr=subprocess.STDOUT)
         (stdout, _) = process.communicate(input=flush_commands)
         process.wait()
         if process.returncode != 0:
             print('ERROR:', end='')
             die(stdout)
         print(stdout)
     except OSError as _:
         die("OSError running hbase shell to flush tables: {0}".format(_))
     except subprocess.CalledProcessError as _:
         print('Failed to get tables using HBase shell:\n')
         print(_.output)
         sys.exit(_.returncode)
 def check_git_branches_dockerfiles(self, target):
     gitroot = find_git_root(target)
     if gitroot is None:
         die('Failed to find git root for target {0}'.format(target))
     log.debug("finding branches for target '{0}'".format(target))
     repo = git.Repo(gitroot)
     #branches = [str(x) for x in repo.refs if isinstance(x, git.refs.remote.RemoteReference)]
     branches = [str(x) for x in repo.refs if isinstance(x, git.Head)]
     branches = [x.split('/')[-1] for x in branches]
     branches = set(branches)
     branches = [x for x in branches if x not in ('HEAD', 'master')]
     self.branches = branches
     if self.branch_prefix is not None:
         log.debug('restricting to branches matching branch prefix')
         branches = [x for x in branches if self.branch_prefix.match(x)]
     self.selected_branches = branches
     #if log.isEnabledFor(logging.DEBUG):
     log.debug('\n\nbranches for target %s:\n\n%s\n', target,
               '\n'.join(branches))
     original_checkout = 'master'
     try:
         try:
             original_checkout = repo.active_branch.name
         except TypeError as _:
             pass
         for branch in branches:
             log.debug("checking branch '%s' Dockerfiles for target '%s'",
                       branch, target)
             self.branches_checked += 1
             try:
                 repo.git.checkout(branch)
             except git.exc.GitCommandError as _:
                 die(_)
             self.check_path(target, branch)
     except Exception as _:  # pylint: disable=broad-except
         traceback.print_exc()
         sys.exit(1)
     finally:
         log.debug("returning to original checkout '%s'", original_checkout)
         repo.git.checkout(original_checkout)
Ejemplo n.º 39
0
 def check_selenium(self, browser):
     log.info("Connecting to '%s' for browser '%s'", self.hub_url, browser)
     driver = webdriver.Remote(command_executor=self.hub_url,
                               desired_capabilities=getattr(
                                   DesiredCapabilities, browser))
     log.info("Checking url '%s'", self.url)
     driver.get(self.url)
     content = driver.page_source
     title = driver.title
     driver.quit()
     if self.expected_regex:
         log.info("Checking url content matches regex")
         if not self.expected_regex.search(content):
             die('ERROR: Page source content failed regex search')
     elif self.expected_content:
         log.info("Checking url content matches '%s'",
                  self.expected_content)
         if self.expected_content not in content:
             die('ERROR: Page source content failed content match')
     # not really recommended but in this case we cannot predict
     # what to expect on a random url if not specified by --content/--regex (provided in the default test case)
     #
     # https://www.selenium.dev/documentation/en/worst_practices/http_response_codes/
     elif '404' in title:
         die('ERROR: Page title contains a 404 / error ' +
             '(if this is expected, specify --content / --regex to check instead): {}'
             .format(title))
     log.info("Succeeded for browser '%s' against url '%s'", browser,
              self.url)
    def print_results(self, term, limit=None):
        data = self.search(term, limit)
        results = {}
        longest_name = 8
        try:
            # collect in dict to order by stars like normal docker search command
            for item in data['results']:
                star = item['star_count']
                name = item['name']
                if len(name) > longest_name:
                    longest_name = len(name)
                if not isInt(star):
                    die("star count '{0}' for repo '{1}' is not an integer! {2}"
                        .format(star, name, support_msg_api()))
                results[star] = results.get(star, {})
                results[star][name] = results[star].get(name, {})
                result = {}
                result['description'] = item['description']
                result['official'] = '[OK]' if item['is_official'] else ''
                # docker search doesn't output this so neither will I
                #result['trusted'] = result['is_trusted']
                result['automated'] = '[OK]' if item['is_automated'] else ''
                results[star][name] = result
            # mimicking out spacing from 'docker search' command
            if not self.quiet:
                print('{0:{5}s}   {1:45s}   {2:7s}   {3:8s}   {4:10s}'.format(
                    'NAME', 'DESCRIPTION', 'STARS', 'OFFICIAL', 'AUTOMATED',
                    longest_name))
        except KeyError as _:
            die('failed to parse results fields from data returned by DockerHub '
                + '(format may have changed?): {0}'.format(_))
        except IOError as _:
            if str(_) == '[Errno 32] Broken pipe':
                pass
            else:
                raise

        def truncate(mystr, length):
            if len(mystr) > length:
                mystr = mystr[0:length - 3] + '...'
            return mystr

        for star in reversed(sorted(results)):
            for name in sorted(results[star]):
                if self.quiet:
                    print(name.encode('utf-8'))
                else:
                    desc = truncate(results[star][name]['description'], 45)
                    print('{0:{5}s}   {1:45s}   {2:<7d}   {3:8s}   {4:10s}'.
                          format(name.encode('utf-8'), desc.encode('utf-8'),
                                 star, results[star][name]['official'],
                                 results[star][name]['automated'],
                                 longest_name))
        if self.verbose and not self.quiet:
            try:
                print('\nResults Shown: {0}\nTotal Results: {1}'.format(
                    len(data['results']), data['num_results']))
            except KeyError as _:
                die('failed to parse get total results count from data returned by DockerHub '
                    + '(format may have changed?): {0}'.format(_))
Ejemplo n.º 41
0
 def get_tables(self):
     log.info('getting table list')
     try:
         process = subprocess.Popen(['hbase', 'shell'], stdin=PIPE, stdout=PIPE, stderr=subprocess.STDOUT)
         (stdout, _) = process.communicate('list')
         process.wait()
         if process.returncode != 0:
             print('ERROR:', end='')
             die(stdout)
         lines = stdout.split('\n')
         lineno = 1
         for line in lines:
             if self.table_list_header_regex.search(line):
                 break
             lineno += 1
         if lineno > len(lines):
             die("Failed to parse table list output (couldn't find the starting line TABLE)")
         tables = set()
         for line in lines[lineno:]:
             if self.table_list_end_regex.search(line):
                 break
             line = line.strip()
             if not line:
                 continue
             tables.add(line)
         return tables
     except OSError as _:
         die("OSError running hbase shell to list tables: {0}".format(_))
     except subprocess.CalledProcessError as _:
         print('Failed to get tables using HBase shell:\n')
         print(_.output)
         sys.exit(_.returncode)
Ejemplo n.º 42
0
 def run(self):
     if not self.args:
         self.usage('no Dockerfile / directory args given')
     args = uniq_list_ordered(self.args)
     self.branch_prefix = self.get_opt('branch_prefix')
     if self.branch_prefix is not None:
         validate_regex(self.branch_prefix, 'branch prefix')
         self.branch_prefix = re.compile(self.branch_prefix)
     for arg in args:
         if not os.path.exists(arg):
             print("'%s' not found" % arg)
             sys.exit(ERRORS['WARNING'])
         if os.path.isfile(arg):
             log_option('file', arg)
         elif os.path.isdir(arg):
             log_option('directory', arg)
         else:
             die("path '%s' could not be determined as either a file or directory"
                 % arg)
     for arg in args:
         self.check_git_branches_dockerfiles(arg)
     branches_skipped = len(self.branches_skipped)
     if branches_skipped > 0:
         log.warn(
             '{0} branches skipped for not matching expected naming format'.
             format(branches_skipped))
     log.info('{0} Dockerfiles checked across {1} branches'.format(
         len(self.dockerfiles_checked), self.branches_checked))
     branches_failed = len(self.branches_failed)
     _ = '{0} Dockerfiles failed validation across {1} branches'.format(
         self.dockerfiles_failed, branches_failed)
     if branches_failed > 0:
         log.error(_)
     else:
         log.info(_)
     if self.failed:
         log.error('Dockerfile validation FAILED')
         sys.exit(ERRORS['CRITICAL'])
     log.info('Dockerfile validation SUCCEEDED')
 def check_file(self, filename):
     if filename == '-':
         filename = '<STDIN>'
     self.valid_parquet_msg = '%s => Parquet OK' % filename
     self.invalid_parquet_msg = '%s => Parquet INVALID' % filename
     if filename == '<STDIN>':
         try:
             tmp = tempfile.NamedTemporaryFile()
             log.debug('created tmp file from stdin: %s', tmp.name)
             tmp.write(sys.stdin.read())
             tmp.seek(0)
             self.check_parquet(tmp.name)
             tmp.close()
         except IOError as _:
             die("ERROR: %s" % _)
     else:
         if self.is_excluded(filename):
             return
         try:
             self.check_parquet(filename)
         except IOError as _:
             die("ERROR: %s" % _)
Ejemplo n.º 44
0
 def mac_getent_group_name(self, group):
     log.info('mac_getent_group_name(%s)', group)
     command = 'dscl . -read /Groups/{group}'.format(group=group)
     (output, returncode) = self.cmd(command)
     gid = password = name = members = ''
     #log.info('parsing output for group conversion')
     output = output.split('\n')
     for index, line in enumerate(output):
         tokens = line.split()
         if len(tokens) < 1:
             continue
         field = tokens[0]
         if len(tokens) < 2:
             value = ''
         else:
             value = tokens[1]
         if field == 'PrimaryGroupID:':
             gid = value
         elif field == 'Password:'******'x'
         elif field == 'RealName:':
             name = value
             if not value and len(output) > index + 1 and output[
                     index + 1].startswith(' '):
                 name = output[index + 1].strip()
         elif not name and field == 'RecordName:':
             name = value
         elif field == 'GroupMembership:':
             members = ','.join(tokens[1:])
     if not gid:
         return ('', returncode)
     getent_record = '{gid}:{password}:{name}:{members}'.format\
                     (gid=gid, password=password, name=name, members=members)
     if not isInt(gid, allow_negative=True):
         die("parsing error: GID '{gid}' is not numeric in record {record}!"
             .format(gid=gid, record=getent_record))
     return (getent_record, returncode)
Ejemplo n.º 45
0
 def run(self):
     # might have to use compat / transport / protocol args for older versions of HBase or if protocol has been
     # configured to be non-default, see:
     # http://happybase.readthedocs.io/en/stable/api.html#connection
     try:
         log.info('connecting to HBase Thrift Server at {0}:{1}'.format(
             self.host, self.port))
         self.conn = happybase.Connection(host=self.host,
                                          port=self.port,
                                          timeout=10 * 1000)  # ms
         tables = self.get_tables()
         # of course there is a minor race condition here between getting the table list, checking and creating
         # not sure if it's solvable, if you have any idea of how to solve it please let me know, even locking
         # would only protect again multiple runs of this script on the same machine...
         if self.table in tables:
             if self.drop_table:
                 log.info(
                     "table '%s' already existed but -d / --drop-table was specified, removing table first",
                     self.table)
                 self.conn.delete_table(self.table, disable=True)
                 # wait up to 30 secs for table to be deleted
                 #for _ in range(30):
                 #    if self.table not in self.get_tables():
                 #        break
                 #    log.debug('waiting for table to be deleted before creating new one')
                 #    time.sleep(1)
             elif self.use_existing_table:
                 pass
             else:
                 die("WARNING: table '{0}' already exists, will not send data to a pre-existing table for safety"\
                     .format(self.table) +
                     ". You can choose to either --drop-table or --use-existing-table")
         if not self.use_existing_table:
             self.create_table()
         self.populate_table()
         log.info('finished, closing connection')
         self.conn.close()
     except (socket.timeout, ThriftException, HBaseIOError) as _:
         die('ERROR: {0}'.format(_))
Ejemplo n.º 46
0
 def check_file(self, filename, tag):
     filename = os.path.abspath(filename)
     if os.path.basename(filename) != 'Dockerfile':
         return True
     parent = os.path.basename(os.path.dirname(filename))
     tag_base = tag.rsplit('-', 1)[0]
     if parent.lower() != tag_base.lower():
         log.debug("skipping '{0}' as it's parent directory '{1}' doesn't match tag base '{2}'".
                   format(filename, parent, tag_base))
         return True
     self.valid_git_tags_msg = '%s => Dockerfile Git Tags OK' % filename
     self.invalid_git_tags_msg = "%s => Dockerfile Git Tags MISMATCH in tag '%s'" % (filename, tag)
     try:
         if not self.check_dockerfile_arg(filename, tag):
             self.failed = True
             #print(self.invalid_git_tags_msg)
             return False
         # now switched to per tag scan this returns way too much redundant output
         #print(self.valid_git_tags_msg)
     except IOError as _:
         die("ERROR: %s" % _)
     return True
 def cancel_service_checks(self):
     log.info('cancelling all requests matching service check context')
     request_ids = self.get_request_ids()
     #re_context = re.compile(r'.+ Service Check \(batch \d+ of \d+\)', re.I)
     cancel_payload = '{"Requests":{"request_status":"ABORTED","abort_reason":"Aborted by user"}}'
     for request_id in request_ids:
         content = self.get(
             '/clusters/{cluster}/requests/{request_id}'.format(
                 cluster=self.cluster, request_id=request_id))
         try:
             _ = json.loads(content)
             request_context = _['Requests']['request_context']
             if 'Service Check' in request_context:
                 log.info('cancelling request_id %s (%s)', request_id,
                          request_context)
                 self.put(
                     '/clusters/{cluster}/requests/{request_id}'.format(
                         cluster=self.cluster, request_id=request_id),
                     data=cancel_payload)
         except (KeyError, ValueError) as _:
             die('failed to parse response for request_id {0}. '.format(
                 request_id) + support_msg_api())
 def request_service_checks(self, services):
     log.debug('requesting service checks for services: %s', services)
     if not isList(services):
         code_error('non-list passed to request_service_checks')
     url_suffix = '/clusters/{cluster}/request_schedules'.format(
         cluster=self.cluster)
     payload = self.gen_payload(services)
     log.info('sending batch schedule check request for services: ' +
              ', '.join(services))
     content = self.post(url_suffix=url_suffix, data=payload)
     try:
         _ = json.loads(content)
         request_schedule_id = _['resources'][0]['RequestSchedule']['id']
         log.info('RequestSchedule %s submitted', request_schedule_id)
         href = _['resources'][0]['href']
         assert href == self.url_base.rstrip('/') + '/clusters/{0}/request_schedules/{1}'\
                        .format(self.cluster, request_schedule_id)
         if self.watch:
             self.watch_scheduled_request(request_schedule_id)
     except (KeyError, ValueError) as _:
         die('parsing schedule request response failed: ' + str(_) + '. ' +
             support_msg_api())
Ejemplo n.º 49
0
 def check_file(self, filename):
     if filename == '-':
         filename = '<STDIN>'
     self.valid_ldif_msg = '%s => LDIF OK' % filename
     self.invalid_ldif_msg = '%s => LDIF INVALID' % filename
     if filename == '<STDIN>':
         log.debug('checking <STDIN>')
         #self.check_ldif(sys.stdin.read())
         self.check_ldif(sys.stdin)
     else:
         if self.is_excluded(filename):
             return
         try:
             log.debug("checking '%s'", filename)
             with open(filename, 'rb') as iostream:
                 #content = iostream.read()
                 #self.check_ldif(content)
                 self.check_ldif(iostream)
         except IOError as _:
             die("ERROR: %s" % _)
     if self.failed:
         sys.exit(2)
Ejemplo n.º 50
0
 def process_file(self, filename, file_handle):
     for line in file_handle:
         # log.debug(line)
         match = self.re_line.match(line)
         if not match:
             err_msg = "ERROR in file '{0}' on line: {1}".format(
                 filename, line)
             if not self.skip_errors:
                 die(err_msg)
             printerr()
             log.warn(err_msg)
             continue
         metric = match.group(1)
         timestamp = match.group(2)
         # don't have a need for this right now
         # value = match.group(3)
         tags = match.group(4)
         key = metric
         if self.include_timestamps:
             timestamp = int(timestamp)
             # remove millis
             if len(str(timestamp)) >= 15:
                 timestamp = round(timestamp / 1000)
             hour = time.strftime('%Y-%m-%d %H:00', time.gmtime(timestamp))
             key += ' ' + hour
         for tag in sorted(tags.split()):
             key += ' ' + tag.strip()
         if self.prefix_length is None:
             prefix = key
         else:
             prefix = key[0:min(self.prefix_length, len(key))]
         # prefix = self.bytes_to_str(prefix)
         if not self.keys.get(prefix):
             self.keys[prefix] = {'count': 0}
         self.keys[prefix]['count'] += 1
         self.total_keys += 1
         if self.verbose < 2 and self.total_keys % 10000 == 0:
             print('.', file=sys.stderr, end='')
Ejemplo n.º 51
0
 def check_path(self, path, branch):
     status = True
     (branch_base, _) = self.branch_version(branch)
     branch_normalized_name = self.normalize_name(branch_base)
     log.debug('branch normalized name: %s', branch_normalized_name)
     if os.path.isfile(path):
         return self.check_file(path, branch)
     elif os.path.isdir(path):
         if os.path.basename(path) == '.git':
             return True
         for item in os.listdir(path):
             subpath = os.path.join(path, item)
             if os.path.islink(subpath):
                 subpath = os.path.realpath(subpath)
             if os.path.isdir(subpath):
                 subpath_base = os.path.basename(subpath)
                 #log.debug('subpath_base = %s', subpath_base)
                 if self.normalize_name(
                         subpath_base) == branch_normalized_name:
                     if not self.check_path(subpath, branch):
                         status = False
             elif os.path.isfile(subpath):
                 if not self.check_file(subpath, branch):
                     status = False
             elif not os.path.exists(subpath):
                 log.debug(
                     "subpath '%s' does not exist in branch '%s', skipping..."
                     % (subpath, branch))
             else:
                 die("failed to determine if subpath '%s' is file or directory in branch '%s'"
                     % (subpath, branch))
     elif not os.path.exists(path):
         log.debug("path '%s' does not exist in branch '%s', skipping..." %
                   (path, branch))
     else:
         die("failed to determine if path '%s' is file or directory in branch '%s'"
             % (path, branch))
     return status
Ejemplo n.º 52
0
 def check(self, filename):
     if filename == '-':
         filename = '<STDIN>'
     self.filename = filename
     single_quotes = '(found single quotes not double quotes)'
     self.valid_json_msg_single_quotes = '{0} {1}'.format(
         self.valid_json_msg, single_quotes)
     self.invalid_json_msg_single_quotes = '{0} {1}'.format(
         self.invalid_json_msg, single_quotes)
     if filename == '<STDIN>':
         self.iostream = sys.stdin
         if self.get_opt('multi_record'):
             if not self.check_multirecord_json():
                 self.failed = True
                 self.msg = self.invalid_json_msg
                 if not self.passthru:
                     die(self.msg)
         else:
             self.check_json(sys.stdin.read())
     else:
         self.check_file(filename)
     if self.failed:
         sys.exit(2)
 def parse_scheduled_request(content):
     try:
         _ = json.loads(content)
         if _['RequestSchedule']['last_execution_status'] == 'COMPLETED':
             log.info('COMPLETED')
             return 'COMPLETED'
         for item in _['RequestSchedule']['batch']['batch_requests']:
             request_status = 'NO STATUS YET'
             if 'request_status' in item:
                 request_status = item['request_status']
             if request_status == 'COMPLETED':
                 continue
             request_body = item['request_body']
             request_body_dict = json.loads(request_body)
             command = request_body_dict['RequestInfo']['command']
             context = request_body_dict['RequestInfo']['context']
             log.info('{request_status}: {command}: {context}'.format(request_status=request_status,
                                                                      command=command,
                                                                      context=context))
             if request_status != 'ABORTED':
                 return 'IN_PROGRESS'
     except (KeyError, ValueError) as _:
         die('parsing schedule request status failed: ' + str(_) + '. ' + support_msg_api())
 def populate_row_counts(self, table_conn):
     if not self.conn.is_table_enabled(self.table):
         die("table '{0}' is not enabled".format(self.table))
     log.info('getting row counts (this may take a long time)')
     #rows = table_conn.scan(columns=[])
     rows = table_conn.scan()  # columns=[]) doesn't return without cf
     if self.verbose < 2:
         print('progress dots (one per 10,000 rows): ',
               file=sys.stderr,
               end='')
     for row in rows:
         #log.debug(row)
         key = row[0]
         prefix = key[0:min(self.prefix_length, len(key))]
         prefix = self.bytes_to_str(prefix)
         if not self.rows.get(prefix):
             self.rows[prefix] = {'row_count': 0}
         self.rows[prefix]['row_count'] += 1
         self.total_rows += 1
         if self.verbose < 2 and self.total_rows % 10000 == 0:
             print('.', file=sys.stderr, end='')
     if self.verbose < 2:
         print(file=sys.stderr)
 def run(self):
     # might have to use compat / transport / protocol args for older versions of HBase or if protocol has been
     # configured to be non-default, see:
     # http://happybase.readthedocs.io/en/stable/api.html#connection
     try:
         log.info('connecting to HBase Thrift Server at %s:%s', self.host, self.port)
         self.conn = happybase.Connection(host=self.host, port=self.port, timeout=10 * 1000)  # ms
         tables = self.get_tables()
         if self.get_opt('list_tables'):
             print('Tables:\n\n' + '\n'.join(tables))
             sys.exit(3)
         if self.table not in tables:
             die("HBase table '{0}' does not exist!".format(self.table))
         table_conn = self.conn.table(self.table)
         self.populate_row_counts(table_conn)
         self.calculate_row_count_widths()
         self.calculate_row_percentages()
         self.print_table_row_prefix_counts()
         self.print_summary()
         log.info('finished, closing connection')
         self.conn.close()
     except (socket.timeout, ThriftException, HBaseIOError) as _:
         die('ERROR: {0}'.format(_))
Ejemplo n.º 56
0
 def check_file(self, filename):
     self.filename = filename
     if self.filename == '-':
         self.filename = '<STDIN>'
     self.valid_toml_msg = '%s => TOML OK' % self.filename
     self.invalid_toml_msg = '%s => TOML INVALID' % self.filename
     if self.filename == '<STDIN>':
         log.debug('toml stdin')
         self.check_toml(sys.stdin)
     else:
         if self.is_excluded(filename):
             return
         if not self.is_included(filename):
             return
         log.debug('checking %s', self.filename)
         try:
             if self.check_toml(filename):
                 print(self.valid_toml_msg)
             else:
                 print(self.invalid_toml_msg)
                 sys.exit(2)
         except IOError as _:
             die("ERROR: %s" % _)
 def local_main(self, table_conn):
     self.no_region_col = self.get_opt('no_region_name')
     self.sort = self.get_opt('sort')
     self.sort_desc = self.get_opt('desc')
     if self.sort is not None:
         self.sort = self.sort.lower()
         if self.sort not in self.valid_sorts:
             self.usage('invalid --sort option given, must be one of: {0}'.format(', '.join(self.valid_sorts)))
     log_option('no region name', self.no_region_col)
     log_option('sort', self.sort)
     if self.no_region_col:
         self.total_width -= self.region_width
     num_regions = len(self._regions)
     # sanity check and protect against division by zero in summary stats
     if num_regions < 1:
         die('number of regions detected = {0:d} (< 1)'.format(num_regions))
     self.populate_region_metadata()
     self.calculate_widths()
     self.populate_row_counts(table_conn)
     self.calculate_row_count_widths()
     self.calculate_row_percentages()
     self.print_table_region_row_counts()
     self.print_summary()
Ejemplo n.º 58
0
 def query(self, url):
     log.debug('GET %s' % url)
     try:
         verify = True
         # workaround for Travis CI and older pythons - we're not exchanging secret data so this is ok
         #if os.getenv('TRAVIS'):
         #    verify = False
         if os.getenv('SSL_NOVERIFY') == '1':
             log.warn('disabling SSL verification')
             verify = False
         auth = None
         if self.user and self.password:
             auth = (self.user, self.password)
             log.debug('setting basic HTTP authenication using username: %s, password: <omitted>', self.user)
         req = requests.get(url, auth=auth, verify=verify)
     except requests.exceptions.RequestException as _:
         die(_)
     log.debug("response: %s %s", req.status_code, req.reason)
     log.debug("content:\n%s\n%s\n%s", '='*80, req.content.strip(), '='*80)
     if req.status_code != 200:
         die("%s %s" % (req.status_code, req.reason))
     if not isJson(req.content):
         die('invalid non-JSON response from Docker Registry!')
     if log.isEnabledFor(logging.DEBUG):
         print(jsonpp(req.content))
         print('='*80)
     tag_list = []
     try:
         json_data = json.loads(req.content)
         # DockerHub returns like this
         if 'results' in json_data:
             tag_list = [result['name'] for result in json_data['results']]
         # Docker Registry returns like this
         elif 'tags' in json_data:
             tag_list = json_data['tags']
         else:
             raise UnknownError('failed to parse response, found neither results nor tags fields. {0}'\
                                .format(support_msg_api()))
         # could perhaps stack overflow in some scenario
         # not as functional programming 'cool' but will do own tail recursion and just while loop instead
         next_page_url = None
         if 'next' in json_data and json_data['next']:
         #    tag_list += self.query(json_data['next'])
             next_page_url = json_data['next']
         return (tag_list, next_page_url)
     except KeyError as _:
         die('failed to parse output from Docker Registry (format may have changed?): {0}'.format(_))
Ejemplo n.º 59
0
 def connection(self, host, port, user, password, ssl=False, **kwargs):
     # must set X-Requested-By in newer versions of Ambari
     self.x_requested_by = user
     if user == 'admin':
         self.x_requested_by = os.getenv('USER', user)
     #log.info("contacting Ambari as '%s'" % self.user)
     if not isHost(host) or not isPort(port) or not isUser(
             user) or not password:
         raise InvalidOptionException(
             'invalid options passed to AmbariBlueprint()')
     proto = 'http'  # pylint: disable=unused-variable,possibly-unused-variable
     if ssl:
         proto = 'https'
     self.host = host
     self.port = port
     self.user = user
     self.password = password
     # if kwargs.has_key('strip_config') and kwargs['strip_config']:
     if 'strip_config' in kwargs and kwargs['strip_config']:
         self.strip_config = True
     self.url_base = '%(proto)s://%(host)s:%(port)s/api/v1' % locals()
     if 'dir' in kwargs and kwargs['dir']:
         self.blueprint_dir = kwargs['dir']
     if not isDirname(self.blueprint_dir):
         qquit('UNKNOWN', 'invalid dir arg passed to AmbariBlueprintTool')
     try:
         if not self.blueprint_dir or not os.path.exists(
                 self.blueprint_dir):
             log.info("creating blueprint data dir '%s'" %
                      self.blueprint_dir)
             os.mkdir(self.blueprint_dir)
         if not os.path.isdir(self.blueprint_dir):
             raise IOError(
                 "blueprint dir '%s'already taken and is not a directory" %
                 self.blueprint_dir)
     except IOError as _:
         die("'failed to create dir '%s': %s" % (self.blueprint_dir, _))
 def check_dockerfile_arg(self, filename, tag):
     log.debug('check_dockerfile_arg({0}, {1})'.format(filename, tag))
     tag_base = str(tag).replace('-dev', '')
     (tag_base, tag_version) = tag_base.rsplit('-', 1)
     log.debug('tag_base = {0}'.format(tag_base))
     log.debug('tag_version = {0}'.format(tag_version))
     with open(filename) as filehandle:
         for line in filehandle:
             #log.debug(line.strip())
             argversion = self.arg_regex.match(line.strip())
             if argversion:
                 log.debug("found arg '%s'", argversion.group(0))
                 log.debug("checking arg group 1 '%s' == tag_base '%s'",
                           argversion.group(1), tag_base)
                 if argversion.group(1).lower() == tag_base.lower().replace(
                         '-', '_'):
                     log.debug("arg '%s'  matches tag base '%s'",
                               argversion.group(1), tag_base)
                     log.debug(
                         "comparing '%s' contents to version derived from tag '%s' => '%s'",
                         filename, tag, tag_version)
                     if not isVersion(tag_version):
                         die("unrecognized tag version '{0}' for tag_base '{1}'"
                             .format(tag_version, tag_base))
                     found_version = argversion.group(2)
                     #if tag_version == found_version or tag_version == found_version.split('.', 1)[0]:
                     if found_version[0:len(tag_version)] == tag_version:
                         log.info(
                             "{0} (tag version '{1}' matches arg version '{2}')"
                             .format(self.valid_git_tags_msg, tag_version,
                                     found_version))
                         return True
                     log.error('{0} ({1} tag vs {2} Dockerfile ARG)'.format(
                         self.invalid_git_tags_msg, tag_version,
                         found_version))
                     return False
     return True