Exemple #1
0
 def check_version(self, filename, branch, branch_base, arg_var, found_version, branch_version):
     self.branches_dockerfile_checked.add(branch)
     self.dockerfiles_checked.add(filename)
     if arg_var:
         log.debug("found arg '%s'", arg_var)
         arg_version = "ARG '{0}={1}'".format(arg_var, found_version)
     else:
         arg_version = "'{0}'".format(found_version)
     #log.debug("arg '%s' matches branch base '%s'", argversion.group(1), branch_base)
     log.debug("comparing '%s' contents to version derived from branch '%s' => '%s'",
               filename, branch, branch_version)
     if not isVersion(branch_version.lstrip('jdk').lstrip('jre')):
         die("unrecognized branch version '{0}' for branch_base '{1}'"
             .format(branch_version, branch_base))
     #if branch_version == found_version or branch_version == found_version.split('.', 1)[0]:
     if found_version[0:len(branch_version)] == branch_version:
         log.info("{0} version '{1}' matches {2}".
                  format(self.valid_git_branches_msg, branch_version, arg_version))
     else:
         log.error("{0} version '{1}' vs Dockerfile {2}".
                   format(self.invalid_git_branches_msg, branch_version, arg_version))
         self.dockerfiles_failed += 1
         self.branches_failed.add(branch)
         return False
     return True
 def check_dockerfile_arg(self, filename, tag):
     log.debug('check_dockerfile_arg({0}, {1})'.format(filename, tag))
     tag_base = str(tag).replace('-dev', '')
     (tag_base, tag_version) = tag_base.rsplit('-', 1)
     log.debug('tag_base = {0}'.format(tag_base))
     log.debug('tag_version = {0}'.format(tag_version))
     with open(filename) as filehandle:
         for line in filehandle:
             #log.debug(line.strip())
             argversion = self.arg_regex.match(line.strip())
             if argversion:
                 log.debug("found arg '%s'", argversion.group(0))
                 log.debug("checking arg group 1 '%s' == tag_base '%s'", argversion.group(1), tag_base)
                 if argversion.group(1).lower() == tag_base.lower().replace('-', '_'):
                     log.debug("arg '%s'  matches tag base '%s'", argversion.group(1), tag_base)
                     log.debug("comparing '%s' contents to version derived from tag '%s' => '%s'",
                               filename, tag, tag_version)
                     if not isVersion(tag_version):
                         die("unrecognized tag version '{0}' for tag_base '{1}'".format(tag_version, tag_base))
                     found_version = argversion.group(2)
                     #if tag_version == found_version or tag_version == found_version.split('.', 1)[0]:
                     if found_version[0:len(tag_version)] == tag_version:
                         log.info("{0} (tag version '{1}' matches arg version '{2}')".
                                  format(self.valid_git_tags_msg, tag_version, found_version))
                         return True
                     else:
                         log.error('{0} ({1} tag vs {2} Dockerfile ARG)'.
                                   format(self.invalid_git_tags_msg, tag_version, found_version))
                         return False
     return True
 def validate_csvreader(csvreader, filename):
     count = 0
     try:
         # csvreader doesn't seem to generate any errors ever :-(
         # csv module allows entire lines of json/xml/yaml to go in as a single field
         # Adding some invalidations manually
         for field_list in csvreader:
             # list of fields with no separator information
             log.debug("line: %s", field_list)
             # make it fail if there is only a single field on any line
             if len(field_list) < 3:
                 log.error("less than 3 fields detected, aborting conversion of file '%s'", filename)
                 return None
             # extra protection along the same lines as anti-json:
             # the first char of field should be alphanumeric, not syntax
             # however instead of isAlnum allow quotes for quoted CSVs to pass validation
             if field_list[0] not in ("", " ") and not isChars(field_list[0][0], 'A-Za-z0-9"'):
                 log.warning('non-alphanumeric / quote opening character detected in CSV first field' + \
                             '"{}"'.format(field_list[0]))
                 #return None
             count += 1
     except csv.Error as _:
         log.warning('file %s, line %s: %s', filename, csvreader.line_num, _)
         return None
     if count == 0:
         log.error('zero lines detected, blank input is not valid CSV')
         return None
     return csvreader
 def check_git_branches_upstream(self, target):
     target = os.path.abspath(target)
     gitroot = find_git_root(target)
     if gitroot is None:
         die('Failed to find git root for target {0}'.format(target))
     log.debug("finding branches for target '{0}'".format(target))
     repo = git.Repo(gitroot)
     branches = repo.branches
     if self.branch_prefix is not None:
         log.debug('restricting to branches matching branch prefix')
         branches = [x for x in branches if self.branch_prefix.match(str(x))]
         if not branches:
             log.error("No branches matching '%s' for target '%s'", self.get_opt('branch_prefix'), target)
             self.status = 'NO BRANCHES'
     #if log.isEnabledFor(logging.DEBUG):
     #log.debug('\n\nbranches for target %s:\n\n%s\n', target, '\n'.join(list(branches)))
     for branch in branches:
         expected = '{0}/{1}'.format(self.origin, branch)
         tracking_branch = str(branch.tracking_branch())
         if tracking_branch == expected:
             log.info("OK: repo '{0}' branch '{1}' is tracking '{2}'"
                      .format(gitroot, branch, tracking_branch))
         else:
             self.status = "ERROR"
             log.error("BAD: branch '{0}' is tracking '{1}' (expected '{2}')"
                       .format(branch, tracking_branch, expected))
 def detect_columns(self, csvreader):
     headers = csvreader.next()
     if headers[0][0] == '{':
         log.error('JSON opening braces detected, not a CSV?')
         return False
     positions = {'date': None, 'desc': None, 'amount': None}
     balance_position = None
     for (position, value) in enumerate(headers):
         if 'Date' in value:
             positions['date'] = position
         elif 'Merchant Name' in value:
             positions['desc'] = position
         elif 'Amount' in value:
             positions['amount'] = position
         elif 'Balance' in value:
             balance_position = position
     for pos in positions:
         if positions[pos] is None:
             log.error('field %s not found', pos)
             return False
     if balance_position is None and self.running_balance is None:
         self.usage(
             'no balance column detected, please specify --starting-balance'
         )
     return (positions, balance_position)
 def check_path(self, path):
     # os.walk returns nothing if path is a file, and must store file names, sizes, checksums and regex captures
     # even for standalone file args
     if os.path.isfile(path):
         self.check_file(path)
     elif os.path.isdir(path):
         # returns generator
         # root is the dir, dirs and files are child basenames
         #for root, dirs, files in os.walk(path):
         for root, dirs, files in os.walk(path):
             #log.debug('root = %s', root)
             #log.debug('files = %s', files)
             # do not check hidden subdirs
             if not self.include_dot_dirs:
                 # results in 'IndexError: string index out of range' if suffixed with '/'
                 # if os.path.basename(root)[0] == '.':
                 #    continue
                 # could regex strip all suffixed '/' but it's cheaper to just modify the dirs list in place
                 dirs[:] = [d for d in dirs if d[0] != '.']
             for filebasename in files:
                 filepath = os.path.join(root, filebasename)
                 try:
                     self.is_file_dup(filepath)
                 except OSError as exc:
                     log.error("error while checking file '{0}': {1}".format(filepath, exc))
                     self.failed = True
     else:
         die("'%s' is not a file or directory")
 def run(self):
     if not self.args:
         self.usage('no Dockerfile / directory args given')
     args = uniq_list_ordered(self.args)
     self.tag_prefix = self.get_opt('tag_prefix')
     if self.tag_prefix is not None:
         validate_regex(self.tag_prefix, 'tag prefix')
         self.tag_prefix = re.compile(self.tag_prefix)
     for arg in args:
         if not os.path.exists(arg):
             print("'%s' not found" % arg)
             sys.exit(ERRORS['WARNING'])
         if os.path.isfile(arg):
             log_option('file', arg)
         elif os.path.isdir(arg):
             log_option('directory', arg)
         else:
             die("path '%s' could not be determined as either a file or directory"
                 % arg)
     for arg in args:
         self.check_git_tags_dockerfiles(arg)
     if self.failed:
         log.error('Dockerfile validation FAILED')
         sys.exit(ERRORS['CRITICAL'])
     log.info('Dockerfile validation SUCCEEDED')
Exemple #8
0
 def check_path(self, path):
     # os.walk returns nothing if path is a file, and must store file names, sizes, checksums and regex captures
     # even for standalone file args
     if os.path.isfile(path):
         self.check_file(path)
     elif os.path.isdir(path):
         # returns generator
         # root is the dir, dirs and files are child basenames
         #for root, dirs, files in os.walk(path):
         for root, dirs, files in os.walk(path):
             #log.debug('root = %s', root)
             #log.debug('files = %s', files)
             # do not check hidden subdirs
             if not self.include_dot_dirs:
                 # results in 'IndexError: string index out of range' if suffixed with '/'
                 # if os.path.basename(root)[0] == '.':
                 #    continue
                 # could regex strip all suffixed '/' but it's cheaper to just modify the dirs list in place
                 dirs[:] = [d for d in dirs if d[0] != '.']
             for filebasename in files:
                 filepath = os.path.join(root, filebasename)
                 try:
                     self.is_file_dup(filepath)
                 except OSError as exc:
                     log.error(
                         "error while checking file '{0}': {1}".format(
                             filepath, exc))
                     self.failed = True
     else:
         die("'%s' is not a file or directory")
Exemple #9
0
 def convert(self, filename, target_filename):
     if self.reverse_order:
         filename = self.reverse_contents(filename)
     csvreader = self.get_csvreader(filename)
     if not csvreader:
         return False
     count = 0
     (positions, balance_position) = self.detect_columns(csvreader)
     csvwriter = csv.writer(open(target_filename, 'w'))
     csvwriter.writerow(['Date', 'Description', 'Amount', 'Balance'])
     for row in csvreader:
         count += 1
         amount = self.amount(row[positions['amount']])
         if balance_position is not None:
             balance = row[balance_position]
         elif self.running_balance is not None:
             self.running_balance += amount
             balance = self.running_balance
         else:
             log.error('no balance column found and no running balance given')
             sys.exit(2)
         csvwriter.writerow(
             [
                 row[positions['date']],
                 row[positions['desc']],
                 amount,
                 balance
             ]
             )
     log.info('%s CSV lines processed', count)
     return True
 def check_version(self, filename, branch, branch_base, arg_var, found_version, branch_version):
     self.branches_dockerfile_checked.add(branch)
     self.dockerfiles_checked.add(filename)
     if arg_var:
         log.debug("found arg '%s'", arg_var)
         arg_version = "ARG '{0}={1}'".format(arg_var, found_version)
     else:
         arg_version = "'{0}'".format(found_version)
     #log.debug("arg '%s' matches branch base '%s'", argversion.group(1), branch_base)
     log.debug("comparing '%s' contents to version derived from branch '%s' => '%s'",
               filename, branch, branch_version)
     if not isVersion(branch_version.lstrip('jdk').lstrip('jre')):
         die("unrecognized branch version '{0}' for branch_base '{1}'"
             .format(branch_version, branch_base))
     #if branch_version == found_version or branch_version == found_version.split('.', 2)[0]:
     if found_version[0:len(branch_version)] == branch_version:
         log.info("{0} version '{1}' matches {2}".
                  format(self.valid_git_branches_msg, branch_version, arg_version))
     else:
         log.error("{0} version '{1}' vs Dockerfile {2}".
                   format(self.invalid_git_branches_msg, branch_version, arg_version))
         self.dockerfiles_failed += 1
         self.branches_failed.add(branch)
         return False
     return True
Exemple #11
0
 def detect_columns(self, csvreader):
     headers = csvreader.next()
     if headers[0][0] == '{':
         log.error('JSON opening braces detected, not a CSV?')
         return False
     positions = {'date': None, 'desc': None, 'amount': None}
     balance_position = None
     for (position, value) in enumerate(headers):
         # want Transaction Date and not Posted Date
         if 'Date' in value and not 'Posted' in value:
             positions['date'] = position
         elif 'Merchant Name' in value:
             positions['desc'] = position
         # Original Amount column will be original currency eg 499 USD, but we only want native currency eg. 421.33
         elif 'Amount' in value and not 'Original' in value:
             positions['amount'] = position
         elif 'Balance' in value:
             balance_position = position
     for pos in positions:
         if positions[pos] is None:
             log.error('field %s not found', pos)
             return False
     if balance_position is None and self.running_balance is None:
         self.usage('no balance column detected, please specify --starting-balance')
     return (positions, balance_position)
Exemple #12
0
 def process_json(self, content, filename):
     log.debug('process_json()')
     if not content:
         log.warning("blank content passed to process_json for contents of file '%s'", filename)
     if isJson(content):
         print(json.dumps(json.loads(content)))
         return True
     elif self.permit_single_quotes:
         log.debug('checking for single quoted JSON')
         # check if it's regular single quoted JSON a la MongoDB
         json_single_quoted = self.convert_single_quoted(content)
         if self.process_json_single_quoted(json_single_quoted, filename):
             return True
         log.debug('single quoted JSON check failed, trying with pre-escaping double quotes')
         # check if it's single quoted JSON with double quotes that aren't escaped,
         # by pre-escaping them before converting single quotes to doubles for processing
         json_single_quoted_escaped = self.convert_single_quoted_escaped(content)
         if self.process_json_single_quoted(json_single_quoted_escaped, filename):
             log.debug("processed single quoted json with non-escaped double quotes in '%s'", filename)
             return True
         log.debug('single quoted JSON check failed even with pre-escaping any double quotes')
     self.failed = True
     log.error("invalid json detected in '%s':", filename)
     printerr(content)
     if not self.continue_on_error:
         sys.exit(ERRORS['CRITICAL'])
     return False
 def check_git_branches_upstream(self, target):
     target = os.path.abspath(target)
     gitroot = find_git_root(target)
     if gitroot is None:
         die('Failed to find git root for target {0}'.format(target))
     log.debug("finding branches for target '{0}'".format(target))
     repo = git.Repo(gitroot)
     branches = repo.branches
     if self.branch_prefix is not None:
         log.debug('restricting to branches matching branch prefix')
         branches = [
             x for x in branches if self.branch_prefix.match(str(x))
         ]
         if not branches:
             log.error("No branches matching '%s' for target '%s'",
                       self.get_opt('branch_prefix'), target)
             self.status = 'NO BRANCHES'
     #if log.isEnabledFor(logging.DEBUG):
     #log.debug('\n\nbranches for target %s:\n\n%s\n', target, '\n'.join(list(branches)))
     for branch in branches:
         expected = '{0}/{1}'.format(self.origin, branch)
         tracking_branch = str(branch.tracking_branch())
         if tracking_branch == expected:
             log.info(
                 "OK: repo '{0}' branch '{1}' is tracking '{2}'".format(
                     gitroot, branch, tracking_branch))
         else:
             self.status = "ERROR"
             log.error(
                 "BAD: branch '{0}' is tracking '{1}' (expected '{2}')".
                 format(branch, tracking_branch, expected))
 def run(self):
     if not self.args:
         self.usage('no Dockerfile / directory args given')
     args = uniq_list_ordered(self.args)
     self.branch_prefix = self.get_opt('branch_prefix')
     if self.branch_prefix is not None:
         validate_regex(self.branch_prefix, 'branch prefix')
         self.branch_prefix = re.compile(self.branch_prefix)
     for arg in args:
         if not os.path.exists(arg):
             print("'%s' not found" % arg)
             sys.exit(ERRORS['WARNING'])
         if os.path.isfile(arg):
             log_option('file', arg)
         elif os.path.isdir(arg):
             log_option('directory', arg)
         else:
             die("path '%s' could not be determined as either a file or directory"
                 % arg)
     for arg in args:
         self.check_git_branches_dockerfiles(arg)
     log.info('Total Branches: %s', len(self.branches))
     log.info('Selected Branches: %s', len(self.selected_branches))
     log.info('Branches checked: %s', self.branches_checked)
     log.info('Branches with Dockerfile checked: %s',
              len(self.branches_dockerfile_checked))
     branches_skipped = len(self.branches_skipped)
     if branches_skipped > 0:
         log.warn(
             '{0} branches skipped for not matching expected naming format'.
             format(branches_skipped))
     branches_not_checked = len(self.selected_branches) - len(
         self.branches_dockerfile_checked)
     if branches_not_checked > 1:
         log.warn(
             '{0} branches not checked (no matching Dockerfile found?)'.
             format(branches_not_checked))
         if log.isEnabledFor(logging.DEBUG):
             log.debug(
                 'Branches with no corresponding Dockerfile found:\n%s',
                 '\n'.join(
                     set(self.selected_branches) -
                     set(self.branches_dockerfile_checked)))
     log.info('{0} Dockerfiles checked'.format(len(
         self.dockerfiles_checked)))
     branches_failed = len(self.branches_failed)
     _ = '{0} Dockerfiles failed validation across {1} branches'.format(
         self.dockerfiles_failed, branches_failed)
     if branches_failed > 0:
         log.error(_)
     else:
         log.info(_)
     if self.failed:
         log.error('Dockerfile validation FAILED')
         sys.exit(ERRORS['CRITICAL'])
     log.info('Dockerfile validation SUCCEEDED')
 def run(self):
     args = self.process_args()
     self.check_args(args)
     for arg in args:
         try:
             self.check_path(arg)
         except OSError as _:
             log.error(_)
             self.failed = True
     if self.dups_by_name or \
        self.dups_by_size or \
        self.dups_by_hash or \
        self.dups_by_regex:
         if self.quiet:
             for _ in self.dups_by_name:
                 self.dup_filepaths.add(_)
             for _ in itertools.chain.from_iterable(self.dups_by_size.itervalues()):
                 self.dup_filepaths.add(_)
             for _ in itertools.chain.from_iterable(self.dups_by_hash.itervalues()):
                 self.dup_filepaths.add(_)
             for _ in itertools.chain.from_iterable(self.dups_by_regex.itervalues()):
                 self.dup_filepaths.add(_)
             for filepath in sorted(self.dup_filepaths):
                 print(filepath)
             sys.exit(4)
         print('Duplicates detected!\n')
         if self.dups_by_name:
             print('Duplicates by name:\n')
             for basename in self.dups_by_name:
                 print("--\nbasename '{0}':".format(basename))
                 for filepath in sorted(self.dups_by_name[basename]):
                     print(filepath)
         if self.dups_by_size:
             print('Duplicates by size:\n')
             for size in self.dups_by_size:
                 print("--\nsize '{0}' bytes:".format(size))
                 for filepath in sorted(self.dups_by_size[size]):
                     print(filepath)
         if self.dups_by_hash:
             print('Duplicates by checksum:\n')
             for checksum in self.dups_by_hash:
                 print("--\nchecksum '{0}':".format(checksum))
                 for filepath in sorted(self.dups_by_hash[checksum]):
                     print(filepath)
         if self.dups_by_regex:
             print('Duplicates by regex match ({0}):\n'.format(self.regex))
             for matching_portion in self.dups_by_regex:
                 print("--\nregex matching portion '{0}':".format(matching_portion))
                 for filepath in sorted(self.dups_by_regex[matching_portion]):
                     print(filepath)
         sys.exit(4)
     elif self.failed:
         sys.exit(2)
     else:
         print('No Duplicates Found')
         sys.exit(0)
 def run(self):
     args = self.process_args()
     self.check_args(args)
     for arg in args:
         try:
             self.check_path(arg)
         except OSError as _:
             log.error(_)
             self.failed = True
     if self.dups_by_name or \
        self.dups_by_size or \
        self.dups_by_hash or \
        self.dups_by_regex:
         if self.quiet:
             for _ in self.dups_by_name:
                 self.dup_filepaths.add(_)
             for _ in itertools.chain.from_iterable(self.dups_by_size.itervalues()):
                 self.dup_filepaths.add(_)
             for _ in itertools.chain.from_iterable(self.dups_by_hash.itervalues()):
                 self.dup_filepaths.add(_)
             for _ in itertools.chain.from_iterable(self.dups_by_regex.itervalues()):
                 self.dup_filepaths.add(_)
             for filepath in sorted(self.dup_filepaths):
                 print(filepath)
             sys.exit(4)
         print('Duplicates detected!\n')
         if self.dups_by_name:
             print('Duplicates by name:\n')
             for basename in self.dups_by_name:
                 print("--\nbasename '{0}':".format(basename))
                 for filepath in sorted(self.dups_by_name[basename]):
                     print(filepath)
         if self.dups_by_size:
             print('Duplicates by size:\n')
             for size in self.dups_by_size:
                 print("--\nsize '{0}' bytes:".format(size))
                 for filepath in sorted(self.dups_by_size[size]):
                     print(filepath)
         if self.dups_by_hash:
             print('Duplicates by checksum:\n')
             for checksum in self.dups_by_hash:
                 print("--\nchecksum '{0}':".format(checksum))
                 for filepath in sorted(self.dups_by_hash[checksum]):
                     print(filepath)
         if self.dups_by_regex:
             print('Duplicates by regex match ({0}):\n'.format(self.regex))
             for matching_portion in self.dups_by_regex:
                 print("--\nregex matching portion '{0}':".format(matching_portion))
                 for filepath in sorted(self.dups_by_regex[matching_portion]):
                     print(filepath)
         sys.exit(4)
     elif self.failed:
         sys.exit(2)
     else:
         print('No Duplicates Found')
         sys.exit(0)
Exemple #17
0
 def run(self):
     if not self.args:
         self.usage('no file arguments specified')
     for filename in self.args:
         target_filename = '{}_crunch.csv'.format(re.sub(r'\.csv$', '', filename))
         log.info("converting file '%s' => '%s'", filename, target_filename)
         if self.convert(filename, target_filename):
             log.info("converted '%s' => '%s'", filename, target_filename)
         else:
             log.error("FAILED to convert filename '%s'", filename)
     log.info('Final Balance: {}'.format(self.running_balance))
Exemple #18
0
 def process_options(self):
     super(CrunchAccountingCsvStatementConverter, self).process_options()
     self.credit_card = self.get_opt('credit_card')
     self.reverse_order = self.get_opt('reverse_order')
     self.running_balance = self.get_opt('starting_balance')
     if self.running_balance is not None:
         try:
             self.running_balance = Decimal(self.running_balance)
         except ValueError as _:
             log.error('INVALID starting balance %s, must be in a decimal number: %s', self.running_balance, _)
             sys.exit(1)
 def execute(conn, database, table, query):
     try:
         log.info(' %s.%s - running %s', database, table, query)
         with conn.cursor() as query_cursor:
             # doesn't support parameterized query quoting from dbapi spec
             query_cursor.execute(query)
             for result in query_cursor:
                 print('{db}.{table}\t{result}'.format(db=database, table=table, \
                                                       result='\t'.join([str(_) for _ in result])))
     #except (impala.error.OperationalError, impala.error.HiveServer2Error) as _:
     #    log.error(_)
     except impala.error.ProgrammingError as _:
         log.error(_)
         # COMPUTE STATS returns no results
         if 'Trying to fetch results on an operation with no results' not in str(_):
             raise
 def is_file_dup_by_regex(self, filepath):
     match = re.search(self.regex, filepath)
     if match:
         log.debug("regex matched file '%s'", filepath)
         if match.groups():
             capture = match.group(1)
             if capture in self.regex_captures:
                 self.dups_by_regex[capture] = self.dups_by_regex.get(capture, set())
                 self.dups_by_regex[capture].add(self.regex_captures[capture])
                 self.dups_by_regex[capture].add(filepath)
                 return True
             else:
                 self.regex_captures[capture] = filepath
         else:
             log.error('no capture detected! Did you forget to specify the (brackets) to capture in the regex?')
     return False
 def is_file_dup_by_regex(self, filepath):
     match = re.search(self.regex, filepath)
     if match:
         log.debug("regex matched file '%s'", filepath)
         if match.groups():
             capture = match.group(1)
             if capture in self.regex_captures:
                 self.dups_by_regex[capture] = self.dups_by_regex.get(capture, set())
                 self.dups_by_regex[capture].add(self.regex_captures[capture])
                 self.dups_by_regex[capture].add(filepath)
                 return True
             else:
                 self.regex_captures[capture] = filepath
         else:
             log.error('no capture detected! Did you forget to specify the (brackets) to capture in the regex?')
     return False
 def check_git_branches_upstream(self, target):
     target = os.path.abspath(target)
     gitroot = find_git_root(target)
     if gitroot is None:
         die('Failed to find git root for target {0}'.format(target))
     log.debug("finding branches for target '{0}'".format(target))
     repo = git.Repo(gitroot)
     branches = repo.branches
     if self.branch_prefix is not None:
         log.debug('restricting to branches matching branch prefix')
         branches = [
             x for x in branches if self.branch_prefix.match(str(x))
         ]
         if not branches:
             log.error("No branches matching '%s' for target '%s'",
                       self.get_opt('branch_prefix'), target)
             self.status = 'NO BRANCHES'
     #if log.isEnabledFor(logging.DEBUG):
     #log.debug('\n\nbranches for target %s:\n\n%s\n', target, '\n'.join(list(branches)))
     for branch in branches:
         expected = '{0}/{1}'.format(self.origin, branch)
         # have to str() this as it returns an object that will fail equality match otherwise
         tracking_branch = str(branch.tracking_branch())
         if tracking_branch == expected:
             log.info(
                 "OK: repo '{0}' branch '{1}' is tracking '{2}'".format(
                     gitroot, branch, tracking_branch))
         elif self.get_opt('fix') and tracking_branch == 'None':
             log.warn(
                 "WARN: setting repo '{0}' unconfigured branch '{1}' to track '{2}'"
                 .format(gitroot, branch, expected))
             #print(list(repo.remotes.origin.refs))
             branch.set_tracking_branch(
                 git.refs.remote.RemoteReference(repo, 'refs/remotes/' +
                                                 expected))
         elif self.get_opt('force_fix'):
             log.warn(
                 "WARN: forcibly resetting repo '{0}' branch '{1}' to track '{2}'"
                 .format(gitroot, branch, expected))
             branch.set_tracking_branch(
                 git.refs.remote.RemoteReference(repo, 'refs/remotes/' +
                                                 expected))
         else:
             self.status = "ERROR"
             log.error(
                 "BAD: branch '{0}' is tracking '{1}' (expected '{2}')".
                 format(branch, tracking_branch, expected))
Exemple #23
0
 def process_database(self, database, table_regex):
     tables = []
     table_count = 0
     log.info("querying tables for database '%s'", database)
     conn = self.connect(database)
     with conn.cursor() as table_cursor:
         try:
             # doesn't support parameterized query quoting from dbapi spec
             #table_cursor.execute('use %(database)s', {'database': database})
             table_cursor.execute('use `{}`'.format(database))
             table_cursor.execute('show tables')
         except impala.error.HiveServer2Error as _:
             log.error("error querying tables for database '%s': %s",
                       database, _)
             if 'AuthorizationException' in str(_):
                 return
             raise
         for table_row in table_cursor:
             table = table_row[0]
             table_count += 1
             if not table_regex.search(table):
                 log.debug("skipping database '%s' table '%s', does not match regex '%s'", \
                           database, table, self.table)
                 continue
             tables.append(table)
     log.info("%s/%s tables selected for database '%s'", len(tables),
              table_count, database)
     for table in tables:
         try:
             query = self.query.format(db='`{}`'.format(database),
                                       table='`{}`'.format(table))
         except KeyError as _:
             if _ == 'db':
                 query = self.query.format(table='`{}`'.format(table))
             else:
                 raise
         try:
             self.execute(conn, database, table, query)
             self.table_count += 1
         except Exception as _:
             if self.ignore_errors:
                 log.error("database '%s' table '%s':  %s", database, table,
                           _)
                 continue
             raise
 def run(self):
     if not self.args:
         self.usage('no Dockerfile / directory args given')
     args = uniq_list_ordered(self.args)
     self.branch_prefix = self.get_opt('branch_prefix')
     if self.branch_prefix is not None:
         validate_regex(self.branch_prefix, 'branch prefix')
         self.branch_prefix = re.compile(self.branch_prefix)
     for arg in args:
         if not os.path.exists(arg):
             print("'%s' not found" % arg)
             sys.exit(ERRORS['WARNING'])
         if os.path.isfile(arg):
             log_option('file', arg)
         elif os.path.isdir(arg):
             log_option('directory', arg)
         else:
             die("path '%s' could not be determined as either a file or directory" % arg)
     for arg in args:
         self.check_git_branches_dockerfiles(arg)
     log.info('Total Branches: %s', len(self.branches))
     log.info('Selected Branches: %s', len(self.selected_branches))
     log.info('Branches checked: %s', self.branches_checked)
     log.info('Branches with Dockerfile checked: %s', len(self.branches_dockerfile_checked))
     branches_skipped = len(self.branches_skipped)
     if branches_skipped > 0:
         log.warn('{0} branches skipped for not matching expected naming format'
                  .format(branches_skipped))
     branches_not_checked = len(self.selected_branches) - len(self.branches_dockerfile_checked)
     if branches_not_checked > 1:
         log.warn('{0} branches not checked (no matching Dockerfile found?)'.format(branches_not_checked))
         if log.isEnabledFor(logging.DEBUG):
             log.debug('Branches with no corresponding Dockerfile found:\n%s',
                       '\n'.join(set(self.selected_branches) - set(self.branches_dockerfile_checked)))
     log.info('{0} Dockerfiles checked'.format(len(self.dockerfiles_checked)))
     branches_failed = len(self.branches_failed)
     _ = '{0} Dockerfiles failed validation across {1} branches'.format(self.dockerfiles_failed, branches_failed)
     if branches_failed > 0:
         log.error(_)
     else:
         log.info(_)
     if self.failed:
         log.error('Dockerfile validation FAILED')
         sys.exit(ERRORS['CRITICAL'])
     log.info('Dockerfile validation SUCCEEDED')
Exemple #25
0
 def check_dockerfile_arg(self, filename, branch):
     log.debug('check_dockerfile_arg({0}, {1})'.format(filename, branch))
     branch_base = str(branch).replace('-dev', '')
     (branch_base, branch_versions) = self.branch_version(branch)
     with open(filename) as filehandle:
         version_index = 0
         for line in filehandle:
             #log.debug(line.strip())
             argversion = self.arg_regex.match(line.strip())
             if argversion:
                 self.dockerfiles_checked.add(filename)
                 log.debug("found arg '%s'", argversion.group(0))
                 arg_var = argversion.group(1)
                 # this is too restrictive and prevents finding a lot of issues with
                 # more complex naming conventions for kafka, centos-java/scala etc
                 # instead we now expect ARG *_VERSION to be in the same order as the version numbers in branch name
                 #log.debug("checking arg group 1 '%s' == branch_base '%s'", argversion.group(1), branch_base)
                 #if self.normalize_name(arg_var) == self.normalize_name(branch_base).replace('-', '_'):
                 if version_index >= len(branch_versions):
                     return True
                 branch_version = branch_versions[version_index]
                 #log.debug("arg '%s' matches branch base '%s'", argversion.group(1), branch_base)
                 log.debug(
                     "comparing '%s' contents to version derived from branch '%s' => '%s'",
                     filename, branch, branch_version)
                 if not isVersion(branch_version):
                     die("unrecognized branch version '{0}' for branch_base '{1}'"
                         .format(branch_version, branch_base))
                 found_version = argversion.group(2)
                 #if branch_version == found_version or branch_version == found_version.split('.', 2)[0]:
                 if found_version[0:len(branch_version)] == branch_version:
                     log.info("{0} version '{1}' matches {2}={3}".format(
                         self.valid_git_branches_msg, branch_version,
                         arg_var, found_version))
                 else:
                     log.error(
                         '{0} version {1} vs Dockerfile ARG {2}={3}'.format(
                             self.invalid_git_branches_msg, branch_version,
                             arg_var, found_version))
                     self.dockerfiles_failed += 1
                     self.branches_failed.add(branch)
                     return False
                 version_index += 1
     return True
Exemple #26
0
 def get_csvreader(filename):
     try:
         filehandle = open(filename)
     except IOError as _:
         log.error(_)
         return None
     filename = os.path.basename(filename)
     try:
         dialect = csv.Sniffer().sniff(filehandle.read(1024))
         # this will raise an Error if invalid
         dialect.strict = True
         filehandle.seek(0)
         csvreader = csv.reader(filehandle, dialect)
     except csv.Error as _:
         log.warning('file %s: %s', filename, _)
         return None
     csvreader = CrunchAccountingCsvStatementConverter.validate_csvreader(csvreader, filename)
     filehandle.seek(0)
     return csvreader
 def check_dockerfile_arg(self, filename, tag):
     log.debug("check_dockerfile_arg({0}, {1})".format(filename, tag))
     tag_base = str(tag).replace("-dev", "")
     (tag_base, tag_version) = tag_base.rsplit("-", 1)
     log.debug("tag_base = {0}".format(tag_base))
     log.debug("tag_version = {0}".format(tag_version))
     with open(filename) as filehandle:
         for line in filehandle:
             # log.debug(line.strip())
             argversion = self.arg_regex.match(line.strip())
             if argversion:
                 log.debug("found arg '%s'", argversion.group(0))
                 log.debug("checking arg group 1 '%s' == tag_base '%s'", argversion.group(1), tag_base)
                 if argversion.group(1).lower() == tag_base.lower().replace("-", "_"):
                     log.debug("arg '%s'  matches tag base '%s'", argversion.group(1), tag_base)
                     log.debug(
                         "comparing '%s' contents to version derived from tag '%s' => '%s'",
                         filename,
                         tag,
                         tag_version,
                     )
                     if not isVersion(tag_version):
                         die("unrecognized tag version '{0}' for tag_base '{1}'".format(tag_version, tag_base))
                     found_version = argversion.group(2)
                     # if tag_version == found_version or tag_version == found_version.split('.', 2)[0]:
                     if found_version[0 : len(tag_version)] == tag_version:
                         log.info(
                             "{0} (tag version '{1}' matches arg version '{2}')".format(
                                 self.valid_git_tags_msg, tag_version, found_version
                             )
                         )
                         return True
                     else:
                         log.error(
                             "{0} ({1} tag vs {2} Dockerfile ARG)".format(
                                 self.invalid_git_tags_msg, tag_version, found_version
                             )
                         )
                         return False
     return True
 def check_dockerfile_arg(self, filename, branch):
     log.debug('check_dockerfile_arg({0}, {1})'.format(filename, branch))
     branch_base = str(branch).replace('-dev', '')
     (branch_base, branch_versions) = self.branch_version(branch)
     with open(filename) as filehandle:
         version_index = 0
         for line in filehandle:
             #log.debug(line.strip())
             argversion = self.arg_regex.match(line.strip())
             if argversion:
                 self.dockerfiles_checked.add(filename)
                 log.debug("found arg '%s'", argversion.group(0))
                 arg_var = argversion.group(1)
                 # this is too restrictive and prevents finding a lot of issues with
                 # more complex naming conventions for kafka, centos-java/scala etc
                 # instead we now expect ARG *_VERSION to be in the same order as the version numbers in branch name
                 #log.debug("checking arg group 1 '%s' == branch_base '%s'", argversion.group(1), branch_base)
                 #if self.normalize_name(arg_var) == self.normalize_name(branch_base).replace('-', '_'):
                 if version_index >= len(branch_versions):
                     return True
                 branch_version = branch_versions[version_index]
                 #log.debug("arg '%s' matches branch base '%s'", argversion.group(1), branch_base)
                 log.debug("comparing '%s' contents to version derived from branch '%s' => '%s'",
                           filename, branch, branch_version)
                 if not isVersion(branch_version):
                     die("unrecognized branch version '{0}' for branch_base '{1}'"
                         .format(branch_version, branch_base))
                 found_version = argversion.group(2)
                 #if branch_version == found_version or branch_version == found_version.split('.', 2)[0]:
                 if found_version[0:len(branch_version)] == branch_version:
                     log.info("{0} version '{1}' matches {2}={3}".
                              format(self.valid_git_branches_msg, branch_version, arg_var, found_version))
                 else:
                     log.error('{0} version {1} vs Dockerfile ARG {2}={3}'.
                               format(self.invalid_git_branches_msg, branch_version, arg_var, found_version))
                     self.dockerfiles_failed += 1
                     self.branches_failed.add(branch)
                     return False
                 version_index += 1
     return True
Exemple #29
0
 def process_database(self, database):
     log.info("querying tables for database '%s'", database)
     tables = []
     with self.conn.cursor() as table_cursor:
         # doesn't support parameterized query quoting from dbapi spec
         #table_cursor.execute('use %(database)s', {'database': database})
         table_cursor.execute('use `{}`'.format(database))
         table_cursor.execute('show tables')
         for table_row in table_cursor:
             table = table_row[0]
             self.table_count += 1
             tables.append(table)
     log.info("found %s tables in database '%s'", len(tables), database)
     for table in tables:
         try:
             self.process_table(database, table)
         except impala.error.HiveServer2Error as _:
             if self.ignore_errors:
                 log.error("database '%s' table '%s':  %s", database, table,
                           _)
                 continue
             raise
Exemple #30
0
 def run(self):
     if not self.args:
         self.usage('no Dockerfile / directory args given')
     args = uniq_list_ordered(self.args)
     self.branch_prefix = self.get_opt('branch_prefix')
     if self.branch_prefix is not None:
         validate_regex(self.branch_prefix, 'branch prefix')
         self.branch_prefix = re.compile(self.branch_prefix)
     for arg in args:
         if not os.path.exists(arg):
             print("'%s' not found" % arg)
             sys.exit(ERRORS['WARNING'])
         if os.path.isfile(arg):
             log_option('file', arg)
         elif os.path.isdir(arg):
             log_option('directory', arg)
         else:
             die("path '%s' could not be determined as either a file or directory"
                 % arg)
     for arg in args:
         self.check_git_branches_dockerfiles(arg)
     branches_skipped = len(self.branches_skipped)
     if branches_skipped > 0:
         log.warn(
             '{0} branches skipped for not matching expected naming format'.
             format(branches_skipped))
     log.info('{0} Dockerfiles checked across {1} branches'.format(
         len(self.dockerfiles_checked), self.branches_checked))
     branches_failed = len(self.branches_failed)
     _ = '{0} Dockerfiles failed validation across {1} branches'.format(
         self.dockerfiles_failed, branches_failed)
     if branches_failed > 0:
         log.error(_)
     else:
         log.info(_)
     if self.failed:
         log.error('Dockerfile validation FAILED')
         sys.exit(ERRORS['CRITICAL'])
     log.info('Dockerfile validation SUCCEEDED')
 def process_database(self, conn, database, table_regex, partition_regex):
     tables = []
     table_count = 0
     log.info("querying tables for database '%s'", database)
     with conn.cursor() as table_cursor:
         try:
             # doesn't support parameterized query quoting from dbapi spec
             #table_cursor.execute('use %(database)s', {'database': database})
             table_cursor.execute('use `{}`'.format(database))
             table_cursor.execute('show tables')
         except impala.error.HiveServer2Error as _:
             log.error(_)
             if 'AuthorizationException' in str(_):
                 return
             raise
         for table_row in table_cursor:
             table = table_row[0]
             table_count += 1
             if not table_regex.search(table):
                 log.debug("skipping database '%s' table '%s', does not match regex '%s'", \
                           database, table, self.table)
                 continue
             tables.append(table)
     log.info("%s/%s tables selected for database '%s'", len(tables),
              table_count, database)
     for table in tables:
         try:
             self.get_row_counts(conn, database, table, partition_regex)
             self.table_count += 1
         except Exception as _:
             # invalid query handle and similar errors happen at higher level
             # as they are not query specific, will not be caught here so still error out
             if self.ignore_errors:
                 log.error("database '%s' table '%s':  %s", database, table,
                           _)
                 continue
             raise
 def run(self):
     if not self.args:
         self.usage("no Dockerfile / directory args given")
     args = uniq_list_ordered(self.args)
     self.tag_prefix = self.get_opt("tag_prefix")
     if self.tag_prefix is not None:
         validate_regex(self.tag_prefix, "tag prefix")
         self.tag_prefix = re.compile(self.tag_prefix)
     for arg in args:
         if not os.path.exists(arg):
             print("'%s' not found" % arg)
             sys.exit(ERRORS["WARNING"])
         if os.path.isfile(arg):
             log_option("file", arg)
         elif os.path.isdir(arg):
             log_option("directory", arg)
         else:
             die("path '%s' could not be determined as either a file or directory" % arg)
     for arg in args:
         self.check_git_tags_dockerfiles(arg)
     if self.failed:
         log.error("Dockerfile validation FAILED")
         sys.exit(ERRORS["CRITICAL"])
     log.info("Dockerfile validation SUCCEEDED")
 def run(self):
     if not self.args:
         self.usage('no Dockerfile / directory args given')
     args = uniq_list_ordered(self.args)
     self.branch_prefix = self.get_opt('branch_prefix')
     if self.branch_prefix is not None:
         validate_regex(self.branch_prefix, 'branch prefix')
         self.branch_prefix = re.compile(self.branch_prefix)
     for arg in args:
         if not os.path.exists(arg):
             print("'%s' not found" % arg)
             sys.exit(ERRORS['WARNING'])
         if os.path.isfile(arg):
             log_option('file', arg)
         elif os.path.isdir(arg):
             log_option('directory', arg)
         else:
             die("path '%s' could not be determined as either a file or directory" % arg)
     for arg in args:
         self.check_git_branches_dockerfiles(arg)
     branches_skipped = len(self.branches_skipped)
     if branches_skipped > 0:
         log.warn('{0} branches skipped for not matching expected naming format'
                  .format(branches_skipped))
     log.info('{0} Dockerfiles checked across {1} branches'
              .format(len(self.dockerfiles_checked), self.branches_checked))
     branches_failed = len(self.branches_failed)
     _ = '{0} Dockerfiles failed validation across {1} branches'.format(self.dockerfiles_failed, branches_failed)
     if branches_failed > 0:
         log.error(_)
     else:
         log.info(_)
     if self.failed:
         log.error('Dockerfile validation FAILED')
         sys.exit(ERRORS['CRITICAL'])
     log.info('Dockerfile validation SUCCEEDED')
 def check_git_branches_upstream(self, target):
     target = os.path.abspath(target)
     gitroot = find_git_root(target)
     if gitroot is None:
         die('Failed to find git root for target {0}'.format(target))
     log.debug("finding branches for target '{0}'".format(target))
     repo = git.Repo(gitroot)
     branches = repo.branches
     if self.branch_prefix is not None:
         log.debug('restricting to branches matching branch prefix')
         branches = [x for x in branches if self.branch_prefix.match(str(x))]
         if not branches:
             log.error("No branches matching '%s' for target '%s'", self.get_opt('branch_prefix'), target)
             self.status = 'NO BRANCHES'
     #if log.isEnabledFor(logging.DEBUG):
     #log.debug('\n\nbranches for target %s:\n\n%s\n', target, '\n'.join(list(branches)))
     for branch in branches:
         expected = '{0}/{1}'.format(self.origin, branch)
         # have to str() this as it returns an object that will fail equality match otherwise
         tracking_branch = str(branch.tracking_branch())
         if tracking_branch == expected:
             log.info("OK: repo '{0}' branch '{1}' is tracking '{2}'"
                      .format(gitroot, branch, tracking_branch))
         elif self.get_opt('fix') and tracking_branch == 'None':
             log.warn("WARN: setting repo '{0}' unconfigured branch '{1}' to track '{2}'"
                      .format(gitroot, branch, expected))
             #print(list(repo.remotes.origin.refs))
             branch.set_tracking_branch(git.refs.remote.RemoteReference(repo, 'refs/remotes/' + expected))
         elif self.get_opt('force_fix'):
             log.warn("WARN: forcibly resetting repo '{0}' branch '{1}' to track '{2}'"
                      .format(gitroot, branch, expected))
             branch.set_tracking_branch(git.refs.remote.RemoteReference(repo, 'refs/remotes/' + expected))
         else:
             self.status = "ERROR"
             log.error("BAD: branch '{0}' is tracking '{1}' (expected '{2}')"
                       .format(branch, tracking_branch, expected))