def check_version(self, filename, branch, branch_base, arg_var, found_version, branch_version): self.branches_dockerfile_checked.add(branch) self.dockerfiles_checked.add(filename) if arg_var: log.debug("found arg '%s'", arg_var) arg_version = "ARG '{0}={1}'".format(arg_var, found_version) else: arg_version = "'{0}'".format(found_version) #log.debug("arg '%s' matches branch base '%s'", argversion.group(1), branch_base) log.debug("comparing '%s' contents to version derived from branch '%s' => '%s'", filename, branch, branch_version) if not isVersion(branch_version.lstrip('jdk').lstrip('jre')): die("unrecognized branch version '{0}' for branch_base '{1}'" .format(branch_version, branch_base)) #if branch_version == found_version or branch_version == found_version.split('.', 1)[0]: if found_version[0:len(branch_version)] == branch_version: log.info("{0} version '{1}' matches {2}". format(self.valid_git_branches_msg, branch_version, arg_version)) else: log.error("{0} version '{1}' vs Dockerfile {2}". format(self.invalid_git_branches_msg, branch_version, arg_version)) self.dockerfiles_failed += 1 self.branches_failed.add(branch) return False return True
def check_dockerfile_arg(self, filename, tag): log.debug('check_dockerfile_arg({0}, {1})'.format(filename, tag)) tag_base = str(tag).replace('-dev', '') (tag_base, tag_version) = tag_base.rsplit('-', 1) log.debug('tag_base = {0}'.format(tag_base)) log.debug('tag_version = {0}'.format(tag_version)) with open(filename) as filehandle: for line in filehandle: #log.debug(line.strip()) argversion = self.arg_regex.match(line.strip()) if argversion: log.debug("found arg '%s'", argversion.group(0)) log.debug("checking arg group 1 '%s' == tag_base '%s'", argversion.group(1), tag_base) if argversion.group(1).lower() == tag_base.lower().replace('-', '_'): log.debug("arg '%s' matches tag base '%s'", argversion.group(1), tag_base) log.debug("comparing '%s' contents to version derived from tag '%s' => '%s'", filename, tag, tag_version) if not isVersion(tag_version): die("unrecognized tag version '{0}' for tag_base '{1}'".format(tag_version, tag_base)) found_version = argversion.group(2) #if tag_version == found_version or tag_version == found_version.split('.', 1)[0]: if found_version[0:len(tag_version)] == tag_version: log.info("{0} (tag version '{1}' matches arg version '{2}')". format(self.valid_git_tags_msg, tag_version, found_version)) return True else: log.error('{0} ({1} tag vs {2} Dockerfile ARG)'. format(self.invalid_git_tags_msg, tag_version, found_version)) return False return True
def validate_csvreader(csvreader, filename): count = 0 try: # csvreader doesn't seem to generate any errors ever :-( # csv module allows entire lines of json/xml/yaml to go in as a single field # Adding some invalidations manually for field_list in csvreader: # list of fields with no separator information log.debug("line: %s", field_list) # make it fail if there is only a single field on any line if len(field_list) < 3: log.error("less than 3 fields detected, aborting conversion of file '%s'", filename) return None # extra protection along the same lines as anti-json: # the first char of field should be alphanumeric, not syntax # however instead of isAlnum allow quotes for quoted CSVs to pass validation if field_list[0] not in ("", " ") and not isChars(field_list[0][0], 'A-Za-z0-9"'): log.warning('non-alphanumeric / quote opening character detected in CSV first field' + \ '"{}"'.format(field_list[0])) #return None count += 1 except csv.Error as _: log.warning('file %s, line %s: %s', filename, csvreader.line_num, _) return None if count == 0: log.error('zero lines detected, blank input is not valid CSV') return None return csvreader
def check_git_branches_upstream(self, target): target = os.path.abspath(target) gitroot = find_git_root(target) if gitroot is None: die('Failed to find git root for target {0}'.format(target)) log.debug("finding branches for target '{0}'".format(target)) repo = git.Repo(gitroot) branches = repo.branches if self.branch_prefix is not None: log.debug('restricting to branches matching branch prefix') branches = [x for x in branches if self.branch_prefix.match(str(x))] if not branches: log.error("No branches matching '%s' for target '%s'", self.get_opt('branch_prefix'), target) self.status = 'NO BRANCHES' #if log.isEnabledFor(logging.DEBUG): #log.debug('\n\nbranches for target %s:\n\n%s\n', target, '\n'.join(list(branches))) for branch in branches: expected = '{0}/{1}'.format(self.origin, branch) tracking_branch = str(branch.tracking_branch()) if tracking_branch == expected: log.info("OK: repo '{0}' branch '{1}' is tracking '{2}'" .format(gitroot, branch, tracking_branch)) else: self.status = "ERROR" log.error("BAD: branch '{0}' is tracking '{1}' (expected '{2}')" .format(branch, tracking_branch, expected))
def detect_columns(self, csvreader): headers = csvreader.next() if headers[0][0] == '{': log.error('JSON opening braces detected, not a CSV?') return False positions = {'date': None, 'desc': None, 'amount': None} balance_position = None for (position, value) in enumerate(headers): if 'Date' in value: positions['date'] = position elif 'Merchant Name' in value: positions['desc'] = position elif 'Amount' in value: positions['amount'] = position elif 'Balance' in value: balance_position = position for pos in positions: if positions[pos] is None: log.error('field %s not found', pos) return False if balance_position is None and self.running_balance is None: self.usage( 'no balance column detected, please specify --starting-balance' ) return (positions, balance_position)
def check_path(self, path): # os.walk returns nothing if path is a file, and must store file names, sizes, checksums and regex captures # even for standalone file args if os.path.isfile(path): self.check_file(path) elif os.path.isdir(path): # returns generator # root is the dir, dirs and files are child basenames #for root, dirs, files in os.walk(path): for root, dirs, files in os.walk(path): #log.debug('root = %s', root) #log.debug('files = %s', files) # do not check hidden subdirs if not self.include_dot_dirs: # results in 'IndexError: string index out of range' if suffixed with '/' # if os.path.basename(root)[0] == '.': # continue # could regex strip all suffixed '/' but it's cheaper to just modify the dirs list in place dirs[:] = [d for d in dirs if d[0] != '.'] for filebasename in files: filepath = os.path.join(root, filebasename) try: self.is_file_dup(filepath) except OSError as exc: log.error("error while checking file '{0}': {1}".format(filepath, exc)) self.failed = True else: die("'%s' is not a file or directory")
def run(self): if not self.args: self.usage('no Dockerfile / directory args given') args = uniq_list_ordered(self.args) self.tag_prefix = self.get_opt('tag_prefix') if self.tag_prefix is not None: validate_regex(self.tag_prefix, 'tag prefix') self.tag_prefix = re.compile(self.tag_prefix) for arg in args: if not os.path.exists(arg): print("'%s' not found" % arg) sys.exit(ERRORS['WARNING']) if os.path.isfile(arg): log_option('file', arg) elif os.path.isdir(arg): log_option('directory', arg) else: die("path '%s' could not be determined as either a file or directory" % arg) for arg in args: self.check_git_tags_dockerfiles(arg) if self.failed: log.error('Dockerfile validation FAILED') sys.exit(ERRORS['CRITICAL']) log.info('Dockerfile validation SUCCEEDED')
def check_path(self, path): # os.walk returns nothing if path is a file, and must store file names, sizes, checksums and regex captures # even for standalone file args if os.path.isfile(path): self.check_file(path) elif os.path.isdir(path): # returns generator # root is the dir, dirs and files are child basenames #for root, dirs, files in os.walk(path): for root, dirs, files in os.walk(path): #log.debug('root = %s', root) #log.debug('files = %s', files) # do not check hidden subdirs if not self.include_dot_dirs: # results in 'IndexError: string index out of range' if suffixed with '/' # if os.path.basename(root)[0] == '.': # continue # could regex strip all suffixed '/' but it's cheaper to just modify the dirs list in place dirs[:] = [d for d in dirs if d[0] != '.'] for filebasename in files: filepath = os.path.join(root, filebasename) try: self.is_file_dup(filepath) except OSError as exc: log.error( "error while checking file '{0}': {1}".format( filepath, exc)) self.failed = True else: die("'%s' is not a file or directory")
def convert(self, filename, target_filename): if self.reverse_order: filename = self.reverse_contents(filename) csvreader = self.get_csvreader(filename) if not csvreader: return False count = 0 (positions, balance_position) = self.detect_columns(csvreader) csvwriter = csv.writer(open(target_filename, 'w')) csvwriter.writerow(['Date', 'Description', 'Amount', 'Balance']) for row in csvreader: count += 1 amount = self.amount(row[positions['amount']]) if balance_position is not None: balance = row[balance_position] elif self.running_balance is not None: self.running_balance += amount balance = self.running_balance else: log.error('no balance column found and no running balance given') sys.exit(2) csvwriter.writerow( [ row[positions['date']], row[positions['desc']], amount, balance ] ) log.info('%s CSV lines processed', count) return True
def check_version(self, filename, branch, branch_base, arg_var, found_version, branch_version): self.branches_dockerfile_checked.add(branch) self.dockerfiles_checked.add(filename) if arg_var: log.debug("found arg '%s'", arg_var) arg_version = "ARG '{0}={1}'".format(arg_var, found_version) else: arg_version = "'{0}'".format(found_version) #log.debug("arg '%s' matches branch base '%s'", argversion.group(1), branch_base) log.debug("comparing '%s' contents to version derived from branch '%s' => '%s'", filename, branch, branch_version) if not isVersion(branch_version.lstrip('jdk').lstrip('jre')): die("unrecognized branch version '{0}' for branch_base '{1}'" .format(branch_version, branch_base)) #if branch_version == found_version or branch_version == found_version.split('.', 2)[0]: if found_version[0:len(branch_version)] == branch_version: log.info("{0} version '{1}' matches {2}". format(self.valid_git_branches_msg, branch_version, arg_version)) else: log.error("{0} version '{1}' vs Dockerfile {2}". format(self.invalid_git_branches_msg, branch_version, arg_version)) self.dockerfiles_failed += 1 self.branches_failed.add(branch) return False return True
def detect_columns(self, csvreader): headers = csvreader.next() if headers[0][0] == '{': log.error('JSON opening braces detected, not a CSV?') return False positions = {'date': None, 'desc': None, 'amount': None} balance_position = None for (position, value) in enumerate(headers): # want Transaction Date and not Posted Date if 'Date' in value and not 'Posted' in value: positions['date'] = position elif 'Merchant Name' in value: positions['desc'] = position # Original Amount column will be original currency eg 499 USD, but we only want native currency eg. 421.33 elif 'Amount' in value and not 'Original' in value: positions['amount'] = position elif 'Balance' in value: balance_position = position for pos in positions: if positions[pos] is None: log.error('field %s not found', pos) return False if balance_position is None and self.running_balance is None: self.usage('no balance column detected, please specify --starting-balance') return (positions, balance_position)
def process_json(self, content, filename): log.debug('process_json()') if not content: log.warning("blank content passed to process_json for contents of file '%s'", filename) if isJson(content): print(json.dumps(json.loads(content))) return True elif self.permit_single_quotes: log.debug('checking for single quoted JSON') # check if it's regular single quoted JSON a la MongoDB json_single_quoted = self.convert_single_quoted(content) if self.process_json_single_quoted(json_single_quoted, filename): return True log.debug('single quoted JSON check failed, trying with pre-escaping double quotes') # check if it's single quoted JSON with double quotes that aren't escaped, # by pre-escaping them before converting single quotes to doubles for processing json_single_quoted_escaped = self.convert_single_quoted_escaped(content) if self.process_json_single_quoted(json_single_quoted_escaped, filename): log.debug("processed single quoted json with non-escaped double quotes in '%s'", filename) return True log.debug('single quoted JSON check failed even with pre-escaping any double quotes') self.failed = True log.error("invalid json detected in '%s':", filename) printerr(content) if not self.continue_on_error: sys.exit(ERRORS['CRITICAL']) return False
def check_git_branches_upstream(self, target): target = os.path.abspath(target) gitroot = find_git_root(target) if gitroot is None: die('Failed to find git root for target {0}'.format(target)) log.debug("finding branches for target '{0}'".format(target)) repo = git.Repo(gitroot) branches = repo.branches if self.branch_prefix is not None: log.debug('restricting to branches matching branch prefix') branches = [ x for x in branches if self.branch_prefix.match(str(x)) ] if not branches: log.error("No branches matching '%s' for target '%s'", self.get_opt('branch_prefix'), target) self.status = 'NO BRANCHES' #if log.isEnabledFor(logging.DEBUG): #log.debug('\n\nbranches for target %s:\n\n%s\n', target, '\n'.join(list(branches))) for branch in branches: expected = '{0}/{1}'.format(self.origin, branch) tracking_branch = str(branch.tracking_branch()) if tracking_branch == expected: log.info( "OK: repo '{0}' branch '{1}' is tracking '{2}'".format( gitroot, branch, tracking_branch)) else: self.status = "ERROR" log.error( "BAD: branch '{0}' is tracking '{1}' (expected '{2}')". format(branch, tracking_branch, expected))
def run(self): if not self.args: self.usage('no Dockerfile / directory args given') args = uniq_list_ordered(self.args) self.branch_prefix = self.get_opt('branch_prefix') if self.branch_prefix is not None: validate_regex(self.branch_prefix, 'branch prefix') self.branch_prefix = re.compile(self.branch_prefix) for arg in args: if not os.path.exists(arg): print("'%s' not found" % arg) sys.exit(ERRORS['WARNING']) if os.path.isfile(arg): log_option('file', arg) elif os.path.isdir(arg): log_option('directory', arg) else: die("path '%s' could not be determined as either a file or directory" % arg) for arg in args: self.check_git_branches_dockerfiles(arg) log.info('Total Branches: %s', len(self.branches)) log.info('Selected Branches: %s', len(self.selected_branches)) log.info('Branches checked: %s', self.branches_checked) log.info('Branches with Dockerfile checked: %s', len(self.branches_dockerfile_checked)) branches_skipped = len(self.branches_skipped) if branches_skipped > 0: log.warn( '{0} branches skipped for not matching expected naming format'. format(branches_skipped)) branches_not_checked = len(self.selected_branches) - len( self.branches_dockerfile_checked) if branches_not_checked > 1: log.warn( '{0} branches not checked (no matching Dockerfile found?)'. format(branches_not_checked)) if log.isEnabledFor(logging.DEBUG): log.debug( 'Branches with no corresponding Dockerfile found:\n%s', '\n'.join( set(self.selected_branches) - set(self.branches_dockerfile_checked))) log.info('{0} Dockerfiles checked'.format(len( self.dockerfiles_checked))) branches_failed = len(self.branches_failed) _ = '{0} Dockerfiles failed validation across {1} branches'.format( self.dockerfiles_failed, branches_failed) if branches_failed > 0: log.error(_) else: log.info(_) if self.failed: log.error('Dockerfile validation FAILED') sys.exit(ERRORS['CRITICAL']) log.info('Dockerfile validation SUCCEEDED')
def run(self): args = self.process_args() self.check_args(args) for arg in args: try: self.check_path(arg) except OSError as _: log.error(_) self.failed = True if self.dups_by_name or \ self.dups_by_size or \ self.dups_by_hash or \ self.dups_by_regex: if self.quiet: for _ in self.dups_by_name: self.dup_filepaths.add(_) for _ in itertools.chain.from_iterable(self.dups_by_size.itervalues()): self.dup_filepaths.add(_) for _ in itertools.chain.from_iterable(self.dups_by_hash.itervalues()): self.dup_filepaths.add(_) for _ in itertools.chain.from_iterable(self.dups_by_regex.itervalues()): self.dup_filepaths.add(_) for filepath in sorted(self.dup_filepaths): print(filepath) sys.exit(4) print('Duplicates detected!\n') if self.dups_by_name: print('Duplicates by name:\n') for basename in self.dups_by_name: print("--\nbasename '{0}':".format(basename)) for filepath in sorted(self.dups_by_name[basename]): print(filepath) if self.dups_by_size: print('Duplicates by size:\n') for size in self.dups_by_size: print("--\nsize '{0}' bytes:".format(size)) for filepath in sorted(self.dups_by_size[size]): print(filepath) if self.dups_by_hash: print('Duplicates by checksum:\n') for checksum in self.dups_by_hash: print("--\nchecksum '{0}':".format(checksum)) for filepath in sorted(self.dups_by_hash[checksum]): print(filepath) if self.dups_by_regex: print('Duplicates by regex match ({0}):\n'.format(self.regex)) for matching_portion in self.dups_by_regex: print("--\nregex matching portion '{0}':".format(matching_portion)) for filepath in sorted(self.dups_by_regex[matching_portion]): print(filepath) sys.exit(4) elif self.failed: sys.exit(2) else: print('No Duplicates Found') sys.exit(0)
def run(self): if not self.args: self.usage('no file arguments specified') for filename in self.args: target_filename = '{}_crunch.csv'.format(re.sub(r'\.csv$', '', filename)) log.info("converting file '%s' => '%s'", filename, target_filename) if self.convert(filename, target_filename): log.info("converted '%s' => '%s'", filename, target_filename) else: log.error("FAILED to convert filename '%s'", filename) log.info('Final Balance: {}'.format(self.running_balance))
def process_options(self): super(CrunchAccountingCsvStatementConverter, self).process_options() self.credit_card = self.get_opt('credit_card') self.reverse_order = self.get_opt('reverse_order') self.running_balance = self.get_opt('starting_balance') if self.running_balance is not None: try: self.running_balance = Decimal(self.running_balance) except ValueError as _: log.error('INVALID starting balance %s, must be in a decimal number: %s', self.running_balance, _) sys.exit(1)
def execute(conn, database, table, query): try: log.info(' %s.%s - running %s', database, table, query) with conn.cursor() as query_cursor: # doesn't support parameterized query quoting from dbapi spec query_cursor.execute(query) for result in query_cursor: print('{db}.{table}\t{result}'.format(db=database, table=table, \ result='\t'.join([str(_) for _ in result]))) #except (impala.error.OperationalError, impala.error.HiveServer2Error) as _: # log.error(_) except impala.error.ProgrammingError as _: log.error(_) # COMPUTE STATS returns no results if 'Trying to fetch results on an operation with no results' not in str(_): raise
def is_file_dup_by_regex(self, filepath): match = re.search(self.regex, filepath) if match: log.debug("regex matched file '%s'", filepath) if match.groups(): capture = match.group(1) if capture in self.regex_captures: self.dups_by_regex[capture] = self.dups_by_regex.get(capture, set()) self.dups_by_regex[capture].add(self.regex_captures[capture]) self.dups_by_regex[capture].add(filepath) return True else: self.regex_captures[capture] = filepath else: log.error('no capture detected! Did you forget to specify the (brackets) to capture in the regex?') return False
def check_git_branches_upstream(self, target): target = os.path.abspath(target) gitroot = find_git_root(target) if gitroot is None: die('Failed to find git root for target {0}'.format(target)) log.debug("finding branches for target '{0}'".format(target)) repo = git.Repo(gitroot) branches = repo.branches if self.branch_prefix is not None: log.debug('restricting to branches matching branch prefix') branches = [ x for x in branches if self.branch_prefix.match(str(x)) ] if not branches: log.error("No branches matching '%s' for target '%s'", self.get_opt('branch_prefix'), target) self.status = 'NO BRANCHES' #if log.isEnabledFor(logging.DEBUG): #log.debug('\n\nbranches for target %s:\n\n%s\n', target, '\n'.join(list(branches))) for branch in branches: expected = '{0}/{1}'.format(self.origin, branch) # have to str() this as it returns an object that will fail equality match otherwise tracking_branch = str(branch.tracking_branch()) if tracking_branch == expected: log.info( "OK: repo '{0}' branch '{1}' is tracking '{2}'".format( gitroot, branch, tracking_branch)) elif self.get_opt('fix') and tracking_branch == 'None': log.warn( "WARN: setting repo '{0}' unconfigured branch '{1}' to track '{2}'" .format(gitroot, branch, expected)) #print(list(repo.remotes.origin.refs)) branch.set_tracking_branch( git.refs.remote.RemoteReference(repo, 'refs/remotes/' + expected)) elif self.get_opt('force_fix'): log.warn( "WARN: forcibly resetting repo '{0}' branch '{1}' to track '{2}'" .format(gitroot, branch, expected)) branch.set_tracking_branch( git.refs.remote.RemoteReference(repo, 'refs/remotes/' + expected)) else: self.status = "ERROR" log.error( "BAD: branch '{0}' is tracking '{1}' (expected '{2}')". format(branch, tracking_branch, expected))
def process_database(self, database, table_regex): tables = [] table_count = 0 log.info("querying tables for database '%s'", database) conn = self.connect(database) with conn.cursor() as table_cursor: try: # doesn't support parameterized query quoting from dbapi spec #table_cursor.execute('use %(database)s', {'database': database}) table_cursor.execute('use `{}`'.format(database)) table_cursor.execute('show tables') except impala.error.HiveServer2Error as _: log.error("error querying tables for database '%s': %s", database, _) if 'AuthorizationException' in str(_): return raise for table_row in table_cursor: table = table_row[0] table_count += 1 if not table_regex.search(table): log.debug("skipping database '%s' table '%s', does not match regex '%s'", \ database, table, self.table) continue tables.append(table) log.info("%s/%s tables selected for database '%s'", len(tables), table_count, database) for table in tables: try: query = self.query.format(db='`{}`'.format(database), table='`{}`'.format(table)) except KeyError as _: if _ == 'db': query = self.query.format(table='`{}`'.format(table)) else: raise try: self.execute(conn, database, table, query) self.table_count += 1 except Exception as _: if self.ignore_errors: log.error("database '%s' table '%s': %s", database, table, _) continue raise
def run(self): if not self.args: self.usage('no Dockerfile / directory args given') args = uniq_list_ordered(self.args) self.branch_prefix = self.get_opt('branch_prefix') if self.branch_prefix is not None: validate_regex(self.branch_prefix, 'branch prefix') self.branch_prefix = re.compile(self.branch_prefix) for arg in args: if not os.path.exists(arg): print("'%s' not found" % arg) sys.exit(ERRORS['WARNING']) if os.path.isfile(arg): log_option('file', arg) elif os.path.isdir(arg): log_option('directory', arg) else: die("path '%s' could not be determined as either a file or directory" % arg) for arg in args: self.check_git_branches_dockerfiles(arg) log.info('Total Branches: %s', len(self.branches)) log.info('Selected Branches: %s', len(self.selected_branches)) log.info('Branches checked: %s', self.branches_checked) log.info('Branches with Dockerfile checked: %s', len(self.branches_dockerfile_checked)) branches_skipped = len(self.branches_skipped) if branches_skipped > 0: log.warn('{0} branches skipped for not matching expected naming format' .format(branches_skipped)) branches_not_checked = len(self.selected_branches) - len(self.branches_dockerfile_checked) if branches_not_checked > 1: log.warn('{0} branches not checked (no matching Dockerfile found?)'.format(branches_not_checked)) if log.isEnabledFor(logging.DEBUG): log.debug('Branches with no corresponding Dockerfile found:\n%s', '\n'.join(set(self.selected_branches) - set(self.branches_dockerfile_checked))) log.info('{0} Dockerfiles checked'.format(len(self.dockerfiles_checked))) branches_failed = len(self.branches_failed) _ = '{0} Dockerfiles failed validation across {1} branches'.format(self.dockerfiles_failed, branches_failed) if branches_failed > 0: log.error(_) else: log.info(_) if self.failed: log.error('Dockerfile validation FAILED') sys.exit(ERRORS['CRITICAL']) log.info('Dockerfile validation SUCCEEDED')
def check_dockerfile_arg(self, filename, branch): log.debug('check_dockerfile_arg({0}, {1})'.format(filename, branch)) branch_base = str(branch).replace('-dev', '') (branch_base, branch_versions) = self.branch_version(branch) with open(filename) as filehandle: version_index = 0 for line in filehandle: #log.debug(line.strip()) argversion = self.arg_regex.match(line.strip()) if argversion: self.dockerfiles_checked.add(filename) log.debug("found arg '%s'", argversion.group(0)) arg_var = argversion.group(1) # this is too restrictive and prevents finding a lot of issues with # more complex naming conventions for kafka, centos-java/scala etc # instead we now expect ARG *_VERSION to be in the same order as the version numbers in branch name #log.debug("checking arg group 1 '%s' == branch_base '%s'", argversion.group(1), branch_base) #if self.normalize_name(arg_var) == self.normalize_name(branch_base).replace('-', '_'): if version_index >= len(branch_versions): return True branch_version = branch_versions[version_index] #log.debug("arg '%s' matches branch base '%s'", argversion.group(1), branch_base) log.debug( "comparing '%s' contents to version derived from branch '%s' => '%s'", filename, branch, branch_version) if not isVersion(branch_version): die("unrecognized branch version '{0}' for branch_base '{1}'" .format(branch_version, branch_base)) found_version = argversion.group(2) #if branch_version == found_version or branch_version == found_version.split('.', 2)[0]: if found_version[0:len(branch_version)] == branch_version: log.info("{0} version '{1}' matches {2}={3}".format( self.valid_git_branches_msg, branch_version, arg_var, found_version)) else: log.error( '{0} version {1} vs Dockerfile ARG {2}={3}'.format( self.invalid_git_branches_msg, branch_version, arg_var, found_version)) self.dockerfiles_failed += 1 self.branches_failed.add(branch) return False version_index += 1 return True
def get_csvreader(filename): try: filehandle = open(filename) except IOError as _: log.error(_) return None filename = os.path.basename(filename) try: dialect = csv.Sniffer().sniff(filehandle.read(1024)) # this will raise an Error if invalid dialect.strict = True filehandle.seek(0) csvreader = csv.reader(filehandle, dialect) except csv.Error as _: log.warning('file %s: %s', filename, _) return None csvreader = CrunchAccountingCsvStatementConverter.validate_csvreader(csvreader, filename) filehandle.seek(0) return csvreader
def check_dockerfile_arg(self, filename, tag): log.debug("check_dockerfile_arg({0}, {1})".format(filename, tag)) tag_base = str(tag).replace("-dev", "") (tag_base, tag_version) = tag_base.rsplit("-", 1) log.debug("tag_base = {0}".format(tag_base)) log.debug("tag_version = {0}".format(tag_version)) with open(filename) as filehandle: for line in filehandle: # log.debug(line.strip()) argversion = self.arg_regex.match(line.strip()) if argversion: log.debug("found arg '%s'", argversion.group(0)) log.debug("checking arg group 1 '%s' == tag_base '%s'", argversion.group(1), tag_base) if argversion.group(1).lower() == tag_base.lower().replace("-", "_"): log.debug("arg '%s' matches tag base '%s'", argversion.group(1), tag_base) log.debug( "comparing '%s' contents to version derived from tag '%s' => '%s'", filename, tag, tag_version, ) if not isVersion(tag_version): die("unrecognized tag version '{0}' for tag_base '{1}'".format(tag_version, tag_base)) found_version = argversion.group(2) # if tag_version == found_version or tag_version == found_version.split('.', 2)[0]: if found_version[0 : len(tag_version)] == tag_version: log.info( "{0} (tag version '{1}' matches arg version '{2}')".format( self.valid_git_tags_msg, tag_version, found_version ) ) return True else: log.error( "{0} ({1} tag vs {2} Dockerfile ARG)".format( self.invalid_git_tags_msg, tag_version, found_version ) ) return False return True
def check_dockerfile_arg(self, filename, branch): log.debug('check_dockerfile_arg({0}, {1})'.format(filename, branch)) branch_base = str(branch).replace('-dev', '') (branch_base, branch_versions) = self.branch_version(branch) with open(filename) as filehandle: version_index = 0 for line in filehandle: #log.debug(line.strip()) argversion = self.arg_regex.match(line.strip()) if argversion: self.dockerfiles_checked.add(filename) log.debug("found arg '%s'", argversion.group(0)) arg_var = argversion.group(1) # this is too restrictive and prevents finding a lot of issues with # more complex naming conventions for kafka, centos-java/scala etc # instead we now expect ARG *_VERSION to be in the same order as the version numbers in branch name #log.debug("checking arg group 1 '%s' == branch_base '%s'", argversion.group(1), branch_base) #if self.normalize_name(arg_var) == self.normalize_name(branch_base).replace('-', '_'): if version_index >= len(branch_versions): return True branch_version = branch_versions[version_index] #log.debug("arg '%s' matches branch base '%s'", argversion.group(1), branch_base) log.debug("comparing '%s' contents to version derived from branch '%s' => '%s'", filename, branch, branch_version) if not isVersion(branch_version): die("unrecognized branch version '{0}' for branch_base '{1}'" .format(branch_version, branch_base)) found_version = argversion.group(2) #if branch_version == found_version or branch_version == found_version.split('.', 2)[0]: if found_version[0:len(branch_version)] == branch_version: log.info("{0} version '{1}' matches {2}={3}". format(self.valid_git_branches_msg, branch_version, arg_var, found_version)) else: log.error('{0} version {1} vs Dockerfile ARG {2}={3}'. format(self.invalid_git_branches_msg, branch_version, arg_var, found_version)) self.dockerfiles_failed += 1 self.branches_failed.add(branch) return False version_index += 1 return True
def process_database(self, database): log.info("querying tables for database '%s'", database) tables = [] with self.conn.cursor() as table_cursor: # doesn't support parameterized query quoting from dbapi spec #table_cursor.execute('use %(database)s', {'database': database}) table_cursor.execute('use `{}`'.format(database)) table_cursor.execute('show tables') for table_row in table_cursor: table = table_row[0] self.table_count += 1 tables.append(table) log.info("found %s tables in database '%s'", len(tables), database) for table in tables: try: self.process_table(database, table) except impala.error.HiveServer2Error as _: if self.ignore_errors: log.error("database '%s' table '%s': %s", database, table, _) continue raise
def run(self): if not self.args: self.usage('no Dockerfile / directory args given') args = uniq_list_ordered(self.args) self.branch_prefix = self.get_opt('branch_prefix') if self.branch_prefix is not None: validate_regex(self.branch_prefix, 'branch prefix') self.branch_prefix = re.compile(self.branch_prefix) for arg in args: if not os.path.exists(arg): print("'%s' not found" % arg) sys.exit(ERRORS['WARNING']) if os.path.isfile(arg): log_option('file', arg) elif os.path.isdir(arg): log_option('directory', arg) else: die("path '%s' could not be determined as either a file or directory" % arg) for arg in args: self.check_git_branches_dockerfiles(arg) branches_skipped = len(self.branches_skipped) if branches_skipped > 0: log.warn( '{0} branches skipped for not matching expected naming format'. format(branches_skipped)) log.info('{0} Dockerfiles checked across {1} branches'.format( len(self.dockerfiles_checked), self.branches_checked)) branches_failed = len(self.branches_failed) _ = '{0} Dockerfiles failed validation across {1} branches'.format( self.dockerfiles_failed, branches_failed) if branches_failed > 0: log.error(_) else: log.info(_) if self.failed: log.error('Dockerfile validation FAILED') sys.exit(ERRORS['CRITICAL']) log.info('Dockerfile validation SUCCEEDED')
def process_database(self, conn, database, table_regex, partition_regex): tables = [] table_count = 0 log.info("querying tables for database '%s'", database) with conn.cursor() as table_cursor: try: # doesn't support parameterized query quoting from dbapi spec #table_cursor.execute('use %(database)s', {'database': database}) table_cursor.execute('use `{}`'.format(database)) table_cursor.execute('show tables') except impala.error.HiveServer2Error as _: log.error(_) if 'AuthorizationException' in str(_): return raise for table_row in table_cursor: table = table_row[0] table_count += 1 if not table_regex.search(table): log.debug("skipping database '%s' table '%s', does not match regex '%s'", \ database, table, self.table) continue tables.append(table) log.info("%s/%s tables selected for database '%s'", len(tables), table_count, database) for table in tables: try: self.get_row_counts(conn, database, table, partition_regex) self.table_count += 1 except Exception as _: # invalid query handle and similar errors happen at higher level # as they are not query specific, will not be caught here so still error out if self.ignore_errors: log.error("database '%s' table '%s': %s", database, table, _) continue raise
def run(self): if not self.args: self.usage("no Dockerfile / directory args given") args = uniq_list_ordered(self.args) self.tag_prefix = self.get_opt("tag_prefix") if self.tag_prefix is not None: validate_regex(self.tag_prefix, "tag prefix") self.tag_prefix = re.compile(self.tag_prefix) for arg in args: if not os.path.exists(arg): print("'%s' not found" % arg) sys.exit(ERRORS["WARNING"]) if os.path.isfile(arg): log_option("file", arg) elif os.path.isdir(arg): log_option("directory", arg) else: die("path '%s' could not be determined as either a file or directory" % arg) for arg in args: self.check_git_tags_dockerfiles(arg) if self.failed: log.error("Dockerfile validation FAILED") sys.exit(ERRORS["CRITICAL"]) log.info("Dockerfile validation SUCCEEDED")
def run(self): if not self.args: self.usage('no Dockerfile / directory args given') args = uniq_list_ordered(self.args) self.branch_prefix = self.get_opt('branch_prefix') if self.branch_prefix is not None: validate_regex(self.branch_prefix, 'branch prefix') self.branch_prefix = re.compile(self.branch_prefix) for arg in args: if not os.path.exists(arg): print("'%s' not found" % arg) sys.exit(ERRORS['WARNING']) if os.path.isfile(arg): log_option('file', arg) elif os.path.isdir(arg): log_option('directory', arg) else: die("path '%s' could not be determined as either a file or directory" % arg) for arg in args: self.check_git_branches_dockerfiles(arg) branches_skipped = len(self.branches_skipped) if branches_skipped > 0: log.warn('{0} branches skipped for not matching expected naming format' .format(branches_skipped)) log.info('{0} Dockerfiles checked across {1} branches' .format(len(self.dockerfiles_checked), self.branches_checked)) branches_failed = len(self.branches_failed) _ = '{0} Dockerfiles failed validation across {1} branches'.format(self.dockerfiles_failed, branches_failed) if branches_failed > 0: log.error(_) else: log.info(_) if self.failed: log.error('Dockerfile validation FAILED') sys.exit(ERRORS['CRITICAL']) log.info('Dockerfile validation SUCCEEDED')
def check_git_branches_upstream(self, target): target = os.path.abspath(target) gitroot = find_git_root(target) if gitroot is None: die('Failed to find git root for target {0}'.format(target)) log.debug("finding branches for target '{0}'".format(target)) repo = git.Repo(gitroot) branches = repo.branches if self.branch_prefix is not None: log.debug('restricting to branches matching branch prefix') branches = [x for x in branches if self.branch_prefix.match(str(x))] if not branches: log.error("No branches matching '%s' for target '%s'", self.get_opt('branch_prefix'), target) self.status = 'NO BRANCHES' #if log.isEnabledFor(logging.DEBUG): #log.debug('\n\nbranches for target %s:\n\n%s\n', target, '\n'.join(list(branches))) for branch in branches: expected = '{0}/{1}'.format(self.origin, branch) # have to str() this as it returns an object that will fail equality match otherwise tracking_branch = str(branch.tracking_branch()) if tracking_branch == expected: log.info("OK: repo '{0}' branch '{1}' is tracking '{2}'" .format(gitroot, branch, tracking_branch)) elif self.get_opt('fix') and tracking_branch == 'None': log.warn("WARN: setting repo '{0}' unconfigured branch '{1}' to track '{2}'" .format(gitroot, branch, expected)) #print(list(repo.remotes.origin.refs)) branch.set_tracking_branch(git.refs.remote.RemoteReference(repo, 'refs/remotes/' + expected)) elif self.get_opt('force_fix'): log.warn("WARN: forcibly resetting repo '{0}' branch '{1}' to track '{2}'" .format(gitroot, branch, expected)) branch.set_tracking_branch(git.refs.remote.RemoteReference(repo, 'refs/remotes/' + expected)) else: self.status = "ERROR" log.error("BAD: branch '{0}' is tracking '{1}' (expected '{2}')" .format(branch, tracking_branch, expected))