Python warn 예제들, harisekhon.utils.log.warn Python 예제들

예제 #1

0

파일 보기

 def create_cluster(self, cluster, filename, blueprint=''):
     # log.debug('create_cluster(%s, %s)' % (filename, name))
     validate_file(filename, 'cluster hosts mapping', nolog=True)
     try:
         _ = open(str(filename))
         file_data = _.read()
     except IOError as _:
         err = "failed to read Ambari cluster host mapping from file '%s': %s" % (
             filename, _)
         # log.critical(err)
         qquit('CRITICAL', err)
     log.info("creating cluster '%s' using file '%s'" % (cluster, filename))
     if not isJson(file_data):
         qquit('CRITICAL', "invalid json found in file '%s'" % filename)
     # don't have access to a blueprint name to enforce reset here
     # json_data = json.loads(file_data)
     # try:
     #     json_data['Blueprints']['blueprint_name'] = blueprint
     # except KeyError, e:
     #     qquit('CRITICAL', 'failed to (re)set blueprint name in cluster/hostmapping data before creating cluster')
     if blueprint:
         try:
             log.info("setting blueprint in cluster creation to '%s'" %
                      blueprint)
             json_data = json.loads(file_data)
             json_data['blueprint'] = blueprint
             file_data = json.dumps(json_data)
         except KeyError as _:
             log.warn(
                 "failed to inject blueprint name '%s' in to cluster creation"
                 % blueprint)
     response = self.send('clusters/%s' % cluster, file_data)
     log.info(
         "Cluster creation submitted, see Ambari web UI to track progress")
     return response

예제 #2

0

파일 보기

 def process_perfdata(self):
     perfdata_raw = None
     if '|' in self.message:
         self.message, perfdata_raw = self.message.split('|', 1)
     if perfdata_raw:
         log.debug("raw perfdata: %s", perfdata_raw)
         for item in perfdata_raw.split():
             if '=' in item:
                 header, data = item.split('=', 1)
                 data = data.split(';')[0]
                 match = self.perfdata_regex.search(data)
                 if match:
                     val = match.group(1)
                     log.debug("found numeric value '%s' in item '%s'", val,
                               item)
                     if match.group(2):
                         units = match.group(2)
                         log.debug("found units '%s' in item '%s'", units,
                                   item)
                         header += " ({0})".format(units)
                     header = header.strip('"')
                     header = header.strip("'")
                     header = header.replace(self.separator, '_')
                     self.headers += [header.upper()]
                     self.perfdata += [val]
                 else:
                     log.warn(
                         "no valid numeric value to extract found in perfdata item '%s'",
                         item)
             else:
                 log.warn("no key=value format detected in item '%s'", item)
     self.message = self.message.strip()

예제 #3

0

파일 보기

파일: ambari_blueprints.py 프로젝트: young8/pytools

 def send_blueprint_file(self, filename, name=''):
     # log.debug('send_blueprint_file(%s, %s)' % (filename, name))
     validate_file(filename, 'blueprint', nolog=True)
     try:
         _ = open(str(filename))
         file_data = _.read()
     except IOError as _:
         err = "failed to read Ambari Blueprint from file '%s': %s" % (file, _)
         # log.critical(err)
         qquit('CRITICAL', err)
     if not name:
         try:
             name = self.parse_blueprint_name(file_data)
             log.info("name not specified, determined blueprint name from file contents as '%s'" % name)
         except KeyError as _:
             pass
     if not name:
         name = os.path.splitext(os.path.basename(file))[0]
         log.info("name not specified and couldn't determine blueprint name from blueprint data, reverting to using filename without extension '%s'" % name) # pylint: disable=line-too-long
     # this solves the issue of having duplicate Blueprint.blueprint_name keys
     try:
         json_data = json.loads(file_data)
         json_data['Blueprints']['blueprint_name'] = name
         data = json.dumps(json_data)
         log.info("reset blueprint field name to '%s'" % name)
     except ValueError as _:
         qquit('CRITICAL', "invalid json found in file '%s': %s" % (file, name))
     except KeyError as _:
         log.warn('failed to reset the Blueprint name: %s' % _)
     return self.send_blueprint(name, data)

예제 #4

0

파일 보기

파일: cli.py 프로젝트: smutel/pylib

 def __parse_args__(self):
     try:
         (self.options, self.args) = self.__parser.parse_args()
     # I don't agree with zero exit code from OptionParser for help/usage,
     # and want UNKNOWN not CRITICAL(2) for switch mis-usage...
     except SystemExit:  # pragma: no cover
         sys.exit(ERRORS['UNKNOWN'])
     if self.options.help:  # pragma: no cover
         self.usage()
     if self.options.version:  # pragma: no cover
         print('%(version)s' % self.__dict__)
         sys.exit(ERRORS['UNKNOWN'])
     if 'timeout' in dir(self.options):
         self.timeout = self.get_opt('timeout')
     env_verbose = os.getenv('VERBOSE')
     if isInt(env_verbose):
         if env_verbose > self.verbose:
             log.debug('environment variable $VERBOSE = %s, increasing verbosity', env_verbose)
             self.verbose = env_verbose
     elif env_verbose is None:
         pass
     else:
         log.warn("$VERBOSE environment variable is not an integer ('%s')", env_verbose)
     self.parse_args()
     return self.options, self.args

예제 #5

0

파일 보기

파일: csv_wrapper.py 프로젝트: desc/nagios-plugins

 def process_perfdata(self):
     perfdata_raw = None
     if '|' in self.message:
         self.message, perfdata_raw = self.message.split('|', 1)
     if perfdata_raw:
         log.debug("raw perfdata: %s", perfdata_raw)
         for item in perfdata_raw.split():
             if '=' in item:
                 header, data = item.split('=', 1)
                 data = data.split(';')[0]
                 match = self.perfdata_regex.search(data)
                 if match:
                     val = match.group(1)
                     log.debug("found numeric value '%s' in item '%s'", val, item)
                     if match.group(2):
                         units = match.group(2)
                         log.debug("found units '%s' in item '%s'", units, item)
                         header += " ({0})".format(units)
                     header = header.strip('"')
                     header = header.strip("'")
                     header = header.replace(self.separator, '_')
                     self.headers += [header.upper()]
                     self.perfdata += [val]
                 else:
                     log.warn("no valid numeric value to extract found in perfdata item '%s'", item)
             else:
                 log.warn("no key=value format detected in item '%s'", item)
     self.message = self.message.strip()

예제 #6

0

파일 보기

    def run(self):
        json_file = self.get_opt('json')
        parquet_dir = self.get_opt('parquet_dir')
        # let Spark fail if csv/parquet aren't available
        # can't check paths exist as want to remain generically portable
        # to HDFS, local filesystm or any other uri scheme Spark supports
        log.info("Json Source: %s" % json_file)
        log.info("Parquet Destination: %s" % parquet_dir)

        conf = SparkConf().setAppName('HS PySpark JSON => Parquet')
        sc = SparkContext(conf=conf)  # pylint: disable=invalid-name
        if self.verbose < 3 and 'setLogLevel' in dir(sc):
            sc.setLogLevel('WARN')
        sqlContext = SQLContext(sc)  # pylint: disable=invalid-name
        spark_version = sc.version
        log.info('Spark version detected as %s' % spark_version)
        if not isVersionLax(spark_version):
            die("Spark version couldn't be determined. " +
                support_msg('pytools'))
        if isMinVersion(spark_version, 1.4):
            df = sqlContext.read.json(json_file)  # pylint: disable=invalid-name
            df.write.parquet(parquet_dir)
        else:
            log.warn('running legacy code for Spark <= 1.3')
            df = sqlContext.jsonFile(json_file)  # pylint: disable=invalid-name
            df.saveAsParquetFile(parquet_dir)

예제 #7

0

파일 보기

 def process_event_selectors(self, client, trail_list):
     total_event_selectors = 0
     num_management = 0
     num_readwrite_all = 0
     trails_without_selectors = 0
     found = False
     for trail in trail_list:
         name = trail['Name']
         if self.trail_name and self.trail_name != name:
             continue
         found = True
         trail_info = client.get_event_selectors(TrailName=name)
         log.debug('%s', jsonpp(trail_info))
         event_selectors = trail_info['EventSelectors']
         num_event_selectors = len(event_selectors)
         total_event_selectors += num_event_selectors
         if num_event_selectors < 1:
             log.warn('cloud trail %s has no event selectors', trail)
             self.warning()
             trails_without_selectors += 1
         for event_selector in event_selectors:
             if event_selector['IncludeManagementEvents']:
                 num_management += 1
             if event_selector['ReadWriteType'].lower() == 'all': # All
                 num_readwrite_all += 1
         if num_management < num_event_selectors or \
            num_readwrite_all < num_event_selectors:
             self.warning()
     if self.trail_name and not found:
         raise CriticalError('cloud trail \'{}\' not found'.format(self.trail_name))
     if total_event_selectors == 0:
         self.warning()
     return (total_event_selectors, num_management, num_readwrite_all, trails_without_selectors)

예제 #8

0

파일 보기

파일: ambari_blueprints.py 프로젝트: young8/pytools

 def create_cluster(self, cluster, filename, blueprint=''):
     # log.debug('create_cluster(%s, %s)' % (filename, name))
     validate_file(filename, 'cluster hosts mapping', nolog=True)
     try:
         _ = open(str(filename))
         file_data = _.read()
     except IOError as _:
         err = "failed to read Ambari cluster host mapping from file '%s': %s" % (filename, _)
         # log.critical(err)
         qquit('CRITICAL', err)
     log.info("creating cluster '%s' using file '%s'" % (cluster, filename))
     if not isJson(file_data):
         qquit('CRITICAL', "invalid json found in file '%s'" % filename)
     # don't have access to a blueprint name to enforce reset here
     # json_data = json.loads(file_data)
     # try:
     #     json_data['Blueprints']['blueprint_name'] = blueprint
     # except KeyError, e:
     #     qquit('CRITICAL', 'failed to (re)set blueprint name in cluster/hostmapping data before creating cluster')
     if blueprint:
         try:
             log.info("setting blueprint in cluster creation to '%s'" % blueprint)
             json_data = json.loads(file_data)
             json_data['blueprint'] = blueprint
             file_data = json.dumps(json_data)
         except KeyError as _:
             log.warn("failed to inject blueprint name '%s' in to cluster creation" % blueprint)
     response = self.send('clusters/%s' % cluster, file_data)
     log.info("Cluster creation submitted, see Ambari web UI to track progress")
     return response

예제 #9

0

파일 보기

 def send_blueprint_file(self, filename, name=''):
     # log.debug('send_blueprint_file(%s, %s)' % (filename, name))
     validate_file(filename, 'blueprint', nolog=True)
     try:
         _ = open(str(filename))
         file_data = _.read()
     except IOError as _:
         err = "failed to read Ambari Blueprint from file '%s': %s" % (
             filename, _)
         # log.critical(err)
         qquit('CRITICAL', err)
     if not name:
         try:
             name = self.parse_blueprint_name(file_data)
             log.info(
                 "name not specified, determined blueprint name from file contents as '%s'"
                 % name)
         except KeyError as _:
             pass
     if not name:
         name = os.path.splitext(os.path.basename(filename))[0]
         log.info("name not specified and couldn't determine blueprint name from blueprint data, reverting to using filename without extension '%s'" % name)  # pylint: disable=line-too-long
     # this solves the issue of having duplicate Blueprint.blueprint_name keys
     try:
         json_data = json.loads(file_data)
         json_data['Blueprints']['blueprint_name'] = name
         data = json.dumps(json_data)
         log.info("reset blueprint field name to '%s'" % name)
     except ValueError as _:
         qquit('CRITICAL',
               "invalid json found in file '%s': %s" % (filename, name))
     except KeyError as _:
         log.warn('failed to reset the Blueprint name: %s' % _)
     return self.send_blueprint(name, data)

예제 #10

0

파일 보기

파일: check_zaloni_bedrock_workflow.py 프로젝트: desc/nagios-plugins

 def extract_response_message(response_dict):
     try:
         return'{0}: {1}. '.format(response_dict['status']['responseCode'],
                                   response_dict['status']['responseMessage'])
     except KeyError:
         log.warn('failed to extract responseCode/responseMessage for additional error information. ' \
                  + support_msg_api())
         return ''

예제 #11

0

파일 보기

 def extract_response_message(response_dict):
     try:
         return'{0}: {1}. '.format(response_dict['status']['responseCode'],
                                   response_dict['status']['responseMessage'])
     except KeyError:
         log.warn('failed to extract responseCode/responseMessage for additional error information. ' \
                  + support_msg_api())
         return ''

예제 #12

0

파일 보기

파일: dockerfiles_check_git_branches.py 프로젝트: wuchangqi/pytools

 def run(self):
     if not self.args:
         self.usage('no Dockerfile / directory args given')
     args = uniq_list_ordered(self.args)
     self.branch_prefix = self.get_opt('branch_prefix')
     if self.branch_prefix is not None:
         validate_regex(self.branch_prefix, 'branch prefix')
         self.branch_prefix = re.compile(self.branch_prefix)
     for arg in args:
         if not os.path.exists(arg):
             print("'%s' not found" % arg)
             sys.exit(ERRORS['WARNING'])
         if os.path.isfile(arg):
             log_option('file', arg)
         elif os.path.isdir(arg):
             log_option('directory', arg)
         else:
             die("path '%s' could not be determined as either a file or directory"
                 % arg)
     for arg in args:
         self.check_git_branches_dockerfiles(arg)
     log.info('Total Branches: %s', len(self.branches))
     log.info('Selected Branches: %s', len(self.selected_branches))
     log.info('Branches checked: %s', self.branches_checked)
     log.info('Branches with Dockerfile checked: %s',
              len(self.branches_dockerfile_checked))
     branches_skipped = len(self.branches_skipped)
     if branches_skipped > 0:
         log.warn(
             '{0} branches skipped for not matching expected naming format'.
             format(branches_skipped))
     branches_not_checked = len(self.selected_branches) - len(
         self.branches_dockerfile_checked)
     if branches_not_checked > 1:
         log.warn(
             '{0} branches not checked (no matching Dockerfile found?)'.
             format(branches_not_checked))
         if log.isEnabledFor(logging.DEBUG):
             log.debug(
                 'Branches with no corresponding Dockerfile found:\n%s',
                 '\n'.join(
                     set(self.selected_branches) -
                     set(self.branches_dockerfile_checked)))
     log.info('{0} Dockerfiles checked'.format(len(
         self.dockerfiles_checked)))
     branches_failed = len(self.branches_failed)
     _ = '{0} Dockerfiles failed validation across {1} branches'.format(
         self.dockerfiles_failed, branches_failed)
     if branches_failed > 0:
         log.error(_)
     else:
         log.info(_)
     if self.failed:
         log.error('Dockerfile validation FAILED')
         sys.exit(ERRORS['CRITICAL'])
     log.info('Dockerfile validation SUCCEEDED')

예제 #13

0

파일 보기

 def send_blueprint(self, name, data):
     # log.debug('save_blueprint(%s, %s)' % (name, data))
     blueprints = self.get_blueprints()
     if name in blueprints:
         log.warn("blueprint with name '%s' already exists" % name)
     log.info("sending blueprint '%s'" % name)
     if log.isEnabledFor(logging.DEBUG):
         log.debug("blueprint data = '%s'" % data)
     # not exposing this to user via switches - shouldn't be using this right now
     # return self.send('blueprints/%s?validate_topology=false' % name, data)
     return self.send('blueprints/%s' % name, data)

예제 #14

0

파일 보기

파일: ambari_blueprints.py 프로젝트: young8/pytools

 def send_blueprint(self, name, data):
     # log.debug('save_blueprint(%s, %s)' % (name, data))
     blueprints = self.get_blueprints()
     if name in blueprints:
         log.warn("blueprint with name '%s' already exists" % name)
     log.info("sending blueprint '%s'" % name)
     if log.isEnabledFor(logging.DEBUG):
         log.debug("blueprint data = '%s'" % data)
     # not exposing this to user via switches - shouldn't be using this right now
     # return self.send('blueprints/%s?validate_topology=false' % name, data)
     return self.send('blueprints/%s' % name, data)

예제 #15

0

파일 보기

 def query(self, url):
     log.debug('GET %s' % url)
     try:
         verify = True
         # workaround for Travis CI and older pythons - we're not exchanging secret data so this is ok
         #if os.getenv('TRAVIS'):
         #    verify = False
         if os.getenv('SSL_NOVERIFY') == '1':
             log.warn('disabling SSL verification')
             verify = False
         auth = None
         if self.user and self.password:
             auth = (self.user, self.password)
             log.debug(
                 'setting basic HTTP authenication using username: %s, password: <omitted>',
                 self.user)
         req = requests.get(url, auth=auth, verify=verify)
     except requests.exceptions.RequestException as _:
         die(_)
     log.debug("response: %s %s", req.status_code, req.reason)
     log.debug("content:\n%s\n%s\n%s", '=' * 80, req.content.strip(),
               '=' * 80)
     if req.status_code != 200:
         die("%s %s" % (req.status_code, req.reason))
     if not isJson(req.content):
         die('invalid non-JSON response from Docker Registry!')
     if log.isEnabledFor(logging.DEBUG):
         print(jsonpp(req.content))
         print('=' * 80)
     tag_list = []
     try:
         json_data = json.loads(req.content)
         # DockerHub returns like this
         if 'results' in json_data:
             tag_list = [result['name'] for result in json_data['results']]
         # Docker Registry returns like this
         elif 'tags' in json_data:
             tag_list = json_data['tags']
         else:
             raise UnknownError('failed to parse response, found neither results nor tags fields. {0}'\
                                .format(support_msg_api()))
         # could perhaps stack overflow in some scenario
         # not as functional programming 'cool' but will do own tail recursion and just while loop instead
         next_page_url = None
         if 'next' in json_data and json_data['next']:
             #    tag_list += self.query(json_data['next'])
             next_page_url = json_data['next']
         return (tag_list, next_page_url)
     except KeyError as _:
         die('failed to parse output from Docker Registry (format may have changed?): {0}'
             .format(_))

예제 #16

0

파일 보기

 def is_file_dup_by_size(self, filepath):
     size = os.stat(filepath).st_size
     log.debug("file '%s' size '%s'", filepath, size)
     if size == 0:
         log.warn("skipping zero byte file '%s'", filepath)
         return 0
     if size in self.sizes:
         if self.compare_by_size:
             self.dups_by_size[size] = self.dups_by_size.get(size, set())
             self.dups_by_size[size].add(*self.sizes[size])
             self.dups_by_size[size].add(filepath)
         return size
     self.sizes[size] = self.sizes.get(size, {})
     self.sizes[size][filepath] = None
     return False

예제 #17

0

파일 보기

파일: find_duplicate_files.py 프로젝트: HariSekhon/pytools

 def is_file_dup_by_size(self, filepath):
     size = os.stat(filepath).st_size
     log.debug("file '%s' size '%s'", filepath, size)
     if size == 0:
         log.warn("skipping zero byte file '%s'", filepath)
         return 0
     if size in self.sizes:
         if self.compare_by_size:
             self.dups_by_size[size] = self.dups_by_size.get(size, set())
             self.dups_by_size[size].add(*self.sizes[size])
             self.dups_by_size[size].add(filepath)
         return size
     self.sizes[size] = self.sizes.get(size, {})
     self.sizes[size][filepath] = None
     return False

예제 #18

0

파일 보기

파일: check_zaloni_bedrock_workflow.py 프로젝트: desc/nagios-plugins

 def get_latest_complete_report(reports):
     if not isList(reports):
         code_error('non-list passed to get_lastest_complete_report()')
     if not reports:
         qquit('UNKNOWN', 'no reports passed to get_latest_complete_report()')
     num_reports = len(reports)
     index = 0
     report = reports[index]
     while report['status'] == 'INCOMPLETE':
         index += 1
         if index < num_reports:
             report = reports[index]
         else:
             log.warn('only incomplete workflows detected, will have to use latest incomplete workflow')
             report = reports[0]
     return report

예제 #19

0

파일 보기

 def get_latest_complete_report(reports):
     if not isList(reports):
         code_error('non-list passed to get_lastest_complete_report()')
     if not reports:
         qquit('UNKNOWN', 'no reports passed to get_latest_complete_report()')
     num_reports = len(reports)
     index = 0
     report = reports[index]
     while report['status'] == 'INCOMPLETE':
         index += 1
         if index < num_reports:
             report = reports[index]
         else:
             log.warn('only incomplete workflows detected, will have to use latest incomplete workflow')
             report = reports[0]
     return report

예제 #20

0

파일 보기

파일: git_check_branches_upstream.py 프로젝트: HariSekhon/DevOps-Python-tools

 def check_git_branches_upstream(self, target):
     target = os.path.abspath(target)
     gitroot = find_git_root(target)
     if gitroot is None:
         die('Failed to find git root for target {0}'.format(target))
     log.debug("finding branches for target '{0}'".format(target))
     repo = git.Repo(gitroot)
     branches = repo.branches
     if self.branch_prefix is not None:
         log.debug('restricting to branches matching branch prefix')
         branches = [
             x for x in branches if self.branch_prefix.match(str(x))
         ]
         if not branches:
             log.error("No branches matching '%s' for target '%s'",
                       self.get_opt('branch_prefix'), target)
             self.status = 'NO BRANCHES'
     #if log.isEnabledFor(logging.DEBUG):
     #log.debug('\n\nbranches for target %s:\n\n%s\n', target, '\n'.join(list(branches)))
     for branch in branches:
         expected = '{0}/{1}'.format(self.origin, branch)
         # have to str() this as it returns an object that will fail equality match otherwise
         tracking_branch = str(branch.tracking_branch())
         if tracking_branch == expected:
             log.info(
                 "OK: repo '{0}' branch '{1}' is tracking '{2}'".format(
                     gitroot, branch, tracking_branch))
         elif self.get_opt('fix') and tracking_branch == 'None':
             log.warn(
                 "WARN: setting repo '{0}' unconfigured branch '{1}' to track '{2}'"
                 .format(gitroot, branch, expected))
             #print(list(repo.remotes.origin.refs))
             branch.set_tracking_branch(
                 git.refs.remote.RemoteReference(repo, 'refs/remotes/' +
                                                 expected))
         elif self.get_opt('force_fix'):
             log.warn(
                 "WARN: forcibly resetting repo '{0}' branch '{1}' to track '{2}'"
                 .format(gitroot, branch, expected))
             branch.set_tracking_branch(
                 git.refs.remote.RemoteReference(repo, 'refs/remotes/' +
                                                 expected))
         else:
             self.status = "ERROR"
             log.error(
                 "BAD: branch '{0}' is tracking '{1}' (expected '{2}')".
                 format(branch, tracking_branch, expected))

예제 #21

0

파일 보기

파일: find_active_server.py 프로젝트: keceel/pytools

 def check_ping(host, count=None, wait=None):
     if count is None:
         count = 1
     if wait is None:
         wait = 3
     if not isInt(count):
         raise UnknownError("passed invalid count '{0}' to check_ping method, must be a valid integer!"\
                            .format(count))
     if not isInt(wait):
         raise UnknownError("passed invalid wait '{0}' to check_ping method, must be a valid integer!"\
                            .format(wait))
     log.info("pinging host '%s' (count=%s, wait=%s)", host, count, wait)
     count_switch = '-c'
     if platform.system().lower() == 'windows':
         count_switch = '-n'
     wait_switch = '-w'
     if platform.system().lower() == 'darwin':
         wait_switch = '-W'
     # causes hang if count / wait are not cast to string
     cmd = [
         'ping', count_switch, '{0}'.format(count), wait_switch,
         '{0}'.format(wait), host
     ]
     log.debug('cmd: %s', ' '.join(cmd))
     #log.debug('args: %s', cmd)
     try:
         process = subprocess.Popen(cmd,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
         #log.debug('communicating')
         (stdout, stderr) = process.communicate()
         #log.debug('waiting for child process')
         process.wait()
         exitcode = process.returncode
         log.debug('stdout: %s', stdout)
         log.debug('stderr: %s', stderr)
         log.debug('exitcode: %s', exitcode)
         if exitcode == 0:
             log.info("host '%s' responded to ping", host)
             return host
     except subprocess.CalledProcessError as _:
         log.warn('ping failed: %s', _.output)
     except OSError as _:
         die('error calling ping: {0}'.format(_))
     return None

예제 #22

0

파일 보기

파일: dockerfiles_check_git_branches.py 프로젝트: HariSekhon/pytools

 def run(self):
     if not self.args:
         self.usage('no Dockerfile / directory args given')
     args = uniq_list_ordered(self.args)
     self.branch_prefix = self.get_opt('branch_prefix')
     if self.branch_prefix is not None:
         validate_regex(self.branch_prefix, 'branch prefix')
         self.branch_prefix = re.compile(self.branch_prefix)
     for arg in args:
         if not os.path.exists(arg):
             print("'%s' not found" % arg)
             sys.exit(ERRORS['WARNING'])
         if os.path.isfile(arg):
             log_option('file', arg)
         elif os.path.isdir(arg):
             log_option('directory', arg)
         else:
             die("path '%s' could not be determined as either a file or directory" % arg)
     for arg in args:
         self.check_git_branches_dockerfiles(arg)
     log.info('Total Branches: %s', len(self.branches))
     log.info('Selected Branches: %s', len(self.selected_branches))
     log.info('Branches checked: %s', self.branches_checked)
     log.info('Branches with Dockerfile checked: %s', len(self.branches_dockerfile_checked))
     branches_skipped = len(self.branches_skipped)
     if branches_skipped > 0:
         log.warn('{0} branches skipped for not matching expected naming format'
                  .format(branches_skipped))
     branches_not_checked = len(self.selected_branches) - len(self.branches_dockerfile_checked)
     if branches_not_checked > 1:
         log.warn('{0} branches not checked (no matching Dockerfile found?)'.format(branches_not_checked))
         if log.isEnabledFor(logging.DEBUG):
             log.debug('Branches with no corresponding Dockerfile found:\n%s',
                       '\n'.join(set(self.selected_branches) - set(self.branches_dockerfile_checked)))
     log.info('{0} Dockerfiles checked'.format(len(self.dockerfiles_checked)))
     branches_failed = len(self.branches_failed)
     _ = '{0} Dockerfiles failed validation across {1} branches'.format(self.dockerfiles_failed, branches_failed)
     if branches_failed > 0:
         log.error(_)
     else:
         log.info(_)
     if self.failed:
         log.error('Dockerfile validation FAILED')
         sys.exit(ERRORS['CRITICAL'])
     log.info('Dockerfile validation SUCCEEDED')

예제 #23

0

파일 보기

파일: serf_event_handler.py 프로젝트: HariSekhon/pytools

 def __init__(self):
     super(SerfEventHandler, self).__init__()
     # allow shorter default 10 sec timeout
     # self.timeout_default = 30
     self.events = ['member-join', 'member-leave', 'member-failed', 'member-update', 'member-reap', 'user', 'query']
     self.node = os.getenv('SERF_SELF_NAME', '')
     self.role = os.getenv('SERF_SELF_ROLE', None)
     self.event = os.getenv('SERF_EVENT', None)
     self.query_name = os.getenv('SERF_QUERY_NAME', None)
     self.user_event = os.getenv('SERF_USER_EVENT', None)
     # self.user_ltime = os.getenv('SERF_USER_LTIME', 0)
     # self.query_ltime = os.getenv('SERF_QUERY_LTIME', 0)
     self.command = None
     # "expected to exit within a reasonable amount of time" according to docs, this seems like a reasonable
     # safeguard and is configurable on the command line via --timeout <secs>
     if self.event is None:
         log.warn('SERF_EVENT environment variable was None!!')
     elif self.event not in self.events:
         log.warn("SERF_EVENT environment variable passed unrecognized event type '%s'" % self.event)

예제 #24

0

파일 보기

파일: find_active_server.py 프로젝트: HariSekhon/pytools

 def check_ping(host, count=1, wait=1):
     log.info("pinging host '%s' (count=%s, wait=%s)", host, count, wait)
     ping_count = '-c {0}'.format(count)
     if platform.system().lower() == 'windows':
         ping_count = '-n 1'
     ping_wait = '-w {0}'.format(wait)
     if platform.system().lower() == 'darwin':
         ping_wait = '-W 1'
     try:
         exitcode = subprocess.call(["ping", ping_count, ping_wait, host],
                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         if exitcode == 0:
             log.info("host '%s' responded to ping", host)
             return host
     except subprocess.CalledProcessError as _:
         log.warn('ping failed: %s', _.output)
     except OSError as _:
         die('error calling ping: {0}'.format(_))
     return None

예제 #25

0

파일 보기

파일: find_active_server.py 프로젝트: wuchangqi/pytools

 def check_ping(host, count=1, wait=1):
     log.info("pinging host '%s' (count=%s, wait=%s)", host, count, wait)
     ping_count = '-c {0}'.format(count)
     if platform.system().lower() == 'windows':
         ping_count = '-n 1'
     ping_wait = '-w {0}'.format(wait)
     if platform.system().lower() == 'darwin':
         ping_wait = '-W 1'
     try:
         exitcode = subprocess.call(["ping", ping_count, ping_wait, host],
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
         if exitcode == 0:
             log.info("host '%s' responded to ping", host)
             return host
     except subprocess.CalledProcessError as _:
         log.warn('ping failed: %s', _.output)
     except OSError as _:
         die('error calling ping: {0}'.format(_))
     return None

예제 #26

0

파일 보기

 def branch_version(self, branch):
     branch_base = None
     branch_versions = []
     # if ...-x.y-x.y
     match = self.branch_regex.match(branch)
     if match:
         groups = match.groups()
         #log.debug('groups = %s', groups)
         branch_base = groups[0]
         for version in groups[1:]:
             if version is None:
                 continue
             branch_versions.append(version)
     else:
         log.warn("Failed to match branch format for branch '{0}'".format(branch) +
                  ", code needs extension for this branch naming format")
         self.branches_skipped.add(branch)
         return ('', [])
     log.debug('branch_base = %s', branch_base)
     log.debug('branch_versions = %s', branch_versions)
     return (branch_base, branch_versions)

예제 #27

0

파일 보기

파일: dockerfiles_check_git_branches.py 프로젝트: HariSekhon/pytools

 def branch_version(self, branch):
     branch_base = None
     branch_versions = []
     # if ...-x.y-x.y
     match = self.branch_regex.match(branch)
     if match:
         groups = match.groups()
         #log.debug('groups = %s', groups)
         branch_base = groups[0]
         for version in groups[1:]:
             if version is None:
                 continue
             branch_versions.append(version)
     else:
         log.warn("Failed to match branch format for branch '{0}'".format(branch) +
                  ", code needs extension for this branch naming format")
         self.branches_skipped.add(branch)
         return ('', [])
     log.debug('branch_base = %s', branch_base)
     log.debug('branch_versions = %s', branch_versions)
     return (branch_base, branch_versions)

예제 #28

0

파일 보기

 def run(self):
     if not self.args:
         self.usage('no Dockerfile / directory args given')
     args = uniq_list_ordered(self.args)
     self.branch_prefix = self.get_opt('branch_prefix')
     if self.branch_prefix is not None:
         validate_regex(self.branch_prefix, 'branch prefix')
         self.branch_prefix = re.compile(self.branch_prefix)
     for arg in args:
         if not os.path.exists(arg):
             print("'%s' not found" % arg)
             sys.exit(ERRORS['WARNING'])
         if os.path.isfile(arg):
             log_option('file', arg)
         elif os.path.isdir(arg):
             log_option('directory', arg)
         else:
             die("path '%s' could not be determined as either a file or directory"
                 % arg)
     for arg in args:
         self.check_git_branches_dockerfiles(arg)
     branches_skipped = len(self.branches_skipped)
     if branches_skipped > 0:
         log.warn(
             '{0} branches skipped for not matching expected naming format'.
             format(branches_skipped))
     log.info('{0} Dockerfiles checked across {1} branches'.format(
         len(self.dockerfiles_checked), self.branches_checked))
     branches_failed = len(self.branches_failed)
     _ = '{0} Dockerfiles failed validation across {1} branches'.format(
         self.dockerfiles_failed, branches_failed)
     if branches_failed > 0:
         log.error(_)
     else:
         log.info(_)
     if self.failed:
         log.error('Dockerfile validation FAILED')
         sys.exit(ERRORS['CRITICAL'])
     log.info('Dockerfile validation SUCCEEDED')

예제 #29

0

파일 보기

 def process_file(self, filename, file_handle):
     for line in file_handle:
         # log.debug(line)
         match = self.re_line.match(line)
         if not match:
             err_msg = "ERROR in file '{0}' on line: {1}".format(
                 filename, line)
             if not self.skip_errors:
                 die(err_msg)
             printerr()
             log.warn(err_msg)
             continue
         metric = match.group(1)
         timestamp = match.group(2)
         # don't have a need for this right now
         # value = match.group(3)
         tags = match.group(4)
         key = metric
         if self.include_timestamps:
             timestamp = int(timestamp)
             # remove millis
             if len(str(timestamp)) >= 15:
                 timestamp = round(timestamp / 1000)
             hour = time.strftime('%Y-%m-%d %H:00', time.gmtime(timestamp))
             key += ' ' + hour
         for tag in sorted(tags.split()):
             key += ' ' + tag.strip()
         if self.prefix_length is None:
             prefix = key
         else:
             prefix = key[0:min(self.prefix_length, len(key))]
         # prefix = self.bytes_to_str(prefix)
         if not self.keys.get(prefix):
             self.keys[prefix] = {'count': 0}
         self.keys[prefix]['count'] += 1
         self.total_keys += 1
         if self.verbose < 2 and self.total_keys % 10000 == 0:
             print('.', file=sys.stderr, end='')

예제 #30

0

파일 보기

파일: spark_json_to_parquet.py 프로젝트: JGalego/pytools

    def run(self):
        json_file = self.get_opt('json')
        parquet_dir = self.get_opt('parquet_dir')
        # let Spark fail if csv/parquet aren't available
        # can't check paths exist as want to remain generically portable
        # to HDFS, local filesystm or any other uri scheme Spark supports
        log.info("Json Source: %s" % json_file)
        log.info("Parquet Destination: %s" % parquet_dir)

        conf = SparkConf().setAppName('HS PySpark JSON => Parquet')
        sc = SparkContext(conf=conf) # pylint: disable=invalid-name
        sqlContext = SQLContext(sc)  # pylint: disable=invalid-name
        spark_version = sc.version
        log.info('Spark version detected as %s' % spark_version)
        if not isVersionLax(spark_version):
            die("Spark version couldn't be determined. " + support_msg('pytools'))
        if isMinVersion(spark_version, 1.4):
            df = sqlContext.read.json(json_file) # pylint: disable=invalid-name
            df.write.parquet(parquet_dir)
        else:
            log.warn('running legacy code for Spark <= 1.3')
            df = sqlContext.jsonFile(json_file) # pylint: disable=invalid-name
            df.saveAsParquetFile(parquet_dir)

예제 #31

0

파일 보기

파일: opentsdb_calculate_import_metric_distribution.py 프로젝트: HariSekhon/pytools

 def process_file(self, filename, file_handle):
     for line in file_handle:
         # log.debug(line)
         match = self.re_line.match(line)
         if not match:
             err_msg = "ERROR in file '{0}' on line: {1}".format(filename, line)
             if not self.skip_errors:
                 die(err_msg)
             printerr()
             log.warn(err_msg)
             continue
         metric = match.group(1)
         timestamp = match.group(2)
         # don't have a need for this right now
         # value = match.group(3)
         tags = match.group(4)
         key = metric
         if self.include_timestamps:
             timestamp = int(timestamp)
             # remove millis
             if len(str(timestamp)) >= 15:
                 timestamp = round(timestamp / 1000)
             hour = time.strftime('%Y-%m-%d %H:00', time.gmtime(timestamp))
             key += ' ' + hour
         for tag in sorted(tags.split()):
             key += ' ' + tag.strip()
         if self.prefix_length is None:
             prefix = key
         else:
             prefix = key[0:min(self.prefix_length, len(key))]
         # prefix = self.bytes_to_str(prefix)
         if not self.keys.get(prefix):
             self.keys[prefix] = {'count': 0}
         self.keys[prefix]['count'] += 1
         self.total_keys += 1
         if self.verbose < 2 and self.total_keys % 10000 == 0:
             print('.', file=sys.stderr, end='')

예제 #32

0

파일 보기

파일: dockerfiles_check_git_branches.py 프로젝트: rainsongsky/pytools

 def run(self):
     if not self.args:
         self.usage('no Dockerfile / directory args given')
     args = uniq_list_ordered(self.args)
     self.branch_prefix = self.get_opt('branch_prefix')
     if self.branch_prefix is not None:
         validate_regex(self.branch_prefix, 'branch prefix')
         self.branch_prefix = re.compile(self.branch_prefix)
     for arg in args:
         if not os.path.exists(arg):
             print("'%s' not found" % arg)
             sys.exit(ERRORS['WARNING'])
         if os.path.isfile(arg):
             log_option('file', arg)
         elif os.path.isdir(arg):
             log_option('directory', arg)
         else:
             die("path '%s' could not be determined as either a file or directory" % arg)
     for arg in args:
         self.check_git_branches_dockerfiles(arg)
     branches_skipped = len(self.branches_skipped)
     if branches_skipped > 0:
         log.warn('{0} branches skipped for not matching expected naming format'
                  .format(branches_skipped))
     log.info('{0} Dockerfiles checked across {1} branches'
              .format(len(self.dockerfiles_checked), self.branches_checked))
     branches_failed = len(self.branches_failed)
     _ = '{0} Dockerfiles failed validation across {1} branches'.format(self.dockerfiles_failed, branches_failed)
     if branches_failed > 0:
         log.error(_)
     else:
         log.info(_)
     if self.failed:
         log.error('Dockerfile validation FAILED')
         sys.exit(ERRORS['CRITICAL'])
     log.info('Dockerfile validation SUCCEEDED')

예제 #33

0

파일 보기

파일: git_check_branches_upstream.py 프로젝트: HariSekhon/pytools

 def check_git_branches_upstream(self, target):
     target = os.path.abspath(target)
     gitroot = find_git_root(target)
     if gitroot is None:
         die('Failed to find git root for target {0}'.format(target))
     log.debug("finding branches for target '{0}'".format(target))
     repo = git.Repo(gitroot)
     branches = repo.branches
     if self.branch_prefix is not None:
         log.debug('restricting to branches matching branch prefix')
         branches = [x for x in branches if self.branch_prefix.match(str(x))]
         if not branches:
             log.error("No branches matching '%s' for target '%s'", self.get_opt('branch_prefix'), target)
             self.status = 'NO BRANCHES'
     #if log.isEnabledFor(logging.DEBUG):
     #log.debug('\n\nbranches for target %s:\n\n%s\n', target, '\n'.join(list(branches)))
     for branch in branches:
         expected = '{0}/{1}'.format(self.origin, branch)
         # have to str() this as it returns an object that will fail equality match otherwise
         tracking_branch = str(branch.tracking_branch())
         if tracking_branch == expected:
             log.info("OK: repo '{0}' branch '{1}' is tracking '{2}'"
                      .format(gitroot, branch, tracking_branch))
         elif self.get_opt('fix') and tracking_branch == 'None':
             log.warn("WARN: setting repo '{0}' unconfigured branch '{1}' to track '{2}'"
                      .format(gitroot, branch, expected))
             #print(list(repo.remotes.origin.refs))
             branch.set_tracking_branch(git.refs.remote.RemoteReference(repo, 'refs/remotes/' + expected))
         elif self.get_opt('force_fix'):
             log.warn("WARN: forcibly resetting repo '{0}' branch '{1}' to track '{2}'"
                      .format(gitroot, branch, expected))
             branch.set_tracking_branch(git.refs.remote.RemoteReference(repo, 'refs/remotes/' + expected))
         else:
             self.status = "ERROR"
             log.error("BAD: branch '{0}' is tracking '{1}' (expected '{2}')"
                       .format(branch, tracking_branch, expected))

예제 #34

0

파일 보기

 def __init__(self):
     super(SerfEventHandler, self).__init__()
     # allow shorter default 10 sec timeout
     # self.timeout_default = 30
     self.events = [
         'member-join', 'member-leave', 'member-failed', 'member-update',
         'member-reap', 'user', 'query'
     ]
     self.node = os.getenv('SERF_SELF_NAME', '')
     self.role = os.getenv('SERF_SELF_ROLE', None)
     self.event = os.getenv('SERF_EVENT', None)
     self.query_name = os.getenv('SERF_QUERY_NAME', None)
     self.user_event = os.getenv('SERF_USER_EVENT', None)
     # self.user_ltime = os.getenv('SERF_USER_LTIME', 0)
     # self.query_ltime = os.getenv('SERF_QUERY_LTIME', 0)
     self.command = None
     # "expected to exit within a reasonable amount of time" according to docs, this seems like a reasonable
     # safeguard and is configurable on the command line via --timeout <secs>
     if self.event is None:
         log.warn('SERF_EVENT environment variable was None!!')
     elif self.event not in self.events:
         log.warn(
             "SERF_EVENT environment variable passed unrecognized event type '%s'"
             % self.event)

예제 #35

0

파일 보기

파일: spark_csv_to_parquet.py 프로젝트: wuchangqi/pytools

    def run(self):
        csv_file = self.get_opt('csv')
        parquet_dir = self.get_opt('parquet_dir')
        has_header = self.get_opt('has_header')
        # I don't know why the Spark guys made this a string instead of a bool
        header_str = 'false'
        if has_header:
            header_str = 'true'
        schema = self.get_opt('schema')
        # let Spark fail if csv/parquet aren't available
        # can't check paths exist as want to remain generically portable
        # to HDFS, local filesystm or any other uri scheme Spark supports
        log.info("CSV Source: %s" % csv_file)
        log.info("Parquet Destination: %s" % parquet_dir)

        if schema:

            def get_type(arg):
                arg = str(arg).lower()
                if arg not in self.types_mapping:
                    self.usage(
                        "invalid type '%s' defined in --schema, must be one of: %s"
                        % (arg, ', '.join(sorted(self.types_mapping.keys()))))
                # return self.types_mapping[arg]
                module = __import__('pyspark.sql.types', globals(), locals(),
                                    ['types'], -1)
                class_ = getattr(module, self.types_mapping[arg])
                _ = class_()
                return _

            def create_struct(arg):
                name = str(arg).strip()
                data_type = 'string'
                if ':' in arg:
                    (name, data_type) = arg.split(':', 1)
                data_class = get_type(data_type)
                return StructField(name, data_class, True)

            # see https://github.com/databricks/spark-csv#python-api
            self.schema = StructType(
                [create_struct(_) for _ in schema.split(',')])
            log.info('generated CSV => Spark schema')

        conf = SparkConf().setAppName('HS PySpark CSV => Parquet')
        sc = SparkContext(conf=conf)  # pylint: disable=invalid-name
        sqlContext = SQLContext(sc)  # pylint: disable=invalid-name
        spark_version = sc.version
        log.info('Spark version detected as %s' % spark_version)

        if not isVersionLax(spark_version):
            die("Spark version couldn't be determined. " +
                support_msg('pytools'))

        # pylint: disable=invalid-name

        df = None
        if isMinVersion(spark_version, 1.4):
            if has_header and not schema:
                log.info('inferring schema from CSV headers')
                df = sqlContext.read.format('com.databricks.spark.csv')\
                     .options(header=header_str, inferschema='true')\
                     .load(csv_file)
            else:
                log.info('using explicitly defined schema')
                df = sqlContext.read\
                     .format('com.databricks.spark.csv')\
                     .options(header=header_str)\
                     .load(csv_file, schema=self.schema)
            df.write.parquet(parquet_dir)
        else:
            log.warn('running legacy code for Spark <= 1.3')
            if has_header and not schema:
                log.info('inferring schema from CSV headers')
                df = sqlContext.load(source="com.databricks.spark.csv",
                                     path=csv_file,
                                     header=header_str,
                                     inferSchema='true')
            elif self.schema:
                log.info('using explicitly defined schema')
                schema = self.schema
                df = sqlContext.load(source="com.databricks.spark.csv",
                                     path=csv_file,
                                     header=header_str,
                                     schema=schema)
            else:
                die('no header and no schema, caught late')
            df.saveAsParquetFile(parquet_dir)

예제 #36

0

파일 보기

파일: check_zaloni_bedrock_workflow.py 프로젝트: zeroly/nagios-plugins

 def check_workflow(self,
                    workflow_name,
                    workflow_id,
                    max_age=None,
                    max_runtime=None):
     log.info("checking workflow '%s' id '%s'", workflow_name, workflow_id)
     # GET /workflow/fetchWorkflowStatus/<instance_id> is also available but only uses wfId, doesn't support wfName
     # returns ['result']['list'] = [ {}, {}, ... ]
     (req, self.query_time) = self.req(
         url='{url_base}/workflow/publish/getWorkflowExecutionHistory'.
         format(url_base=self.url_base),
         # orders by newest first, but seems to return last 10 anyway
         body=json.dumps({
             'chunk_size': 1,
             'currentPage': 1,
             'wfName': workflow_name,
             'wfId': workflow_id
         }))
     info = ''
     if workflow_name:
         info += " name '{0}'".format(workflow_name)
     if workflow_id:
         info += " id '{0}'".format(workflow_id)
     try:
         json_dict = json.loads(req.content)
         result = json_dict['result']
         not_found_err = '{0}. {1}'.format(info, self.extract_response_message(json_dict)) + \
                         'Perhaps you specified the wrong name/id or the workflow hasn\'t run yet? ' + \
                         'Use --list to see existing workflows'
         if result is None:
             if self._all:
                 return None
             qquit('CRITICAL',
                   "no results found for workflow{0}".format(not_found_err))
         reports = result['jobExecutionReports']
         if not isList(reports):
             raise ValueError('jobExecutionReports is not a list')
         if not reports:
             qquit('CRITICAL',
                   "no reports found for workflow{0}".format(not_found_err))
         # orders by newest first by default, checking last run only
         report = reports[0]
         num_reports = len(reports)
         index = 0
         while report['status'] == 'INCOMPLETE':
             index += 1
             if index >= num_reports:
                 log.warn('only incomplete workflows detected')
                 report = reports[0]
             report = reports[index]
         status = report['status']
         if status == 'SUCCESS':
             pass
         elif status == 'INCOMPLETE':
             self.warning()
         else:
             self.critical()
         self.msg += "workflow '{workflow}' id '{id}' status = '{status}'".format(
             workflow=report['wfName'], id=report['wfId'], status=status)
         if not self._all:
             self.check_times(report['startDate'], report['endDate'],
                              max_age, max_runtime)
         return status
     except (KeyError, ValueError) as _:
         qquit('UNKNOWN',
               'error parsing workflow execution history: {0}'.format(_))

예제 #37

0

파일 보기

파일: spark_csv_to_parquet.py 프로젝트: young8/pytools

    def run(self):
        csv_file = self.options.csv
        parquet_dir = self.options.parquet_dir
        has_header = self.options.has_header
        # I don't know why the Spark guys made this a string instead of a bool
        header_str = 'false'
        if has_header:
            header_str = 'true'
        schema = self.options.schema
        # let Spark fail if csv/parquet aren't available
        # can't check paths exist as want to remain generically portable
        # to HDFS, local filesystm or any other uri scheme Spark supports
        log.info("CSV Source: %s" % csv_file)
        log.info("Parquet Destination: %s" % parquet_dir)

        if schema:

            def get_type(arg):
                arg = str(arg).lower()
                if arg not in self.types_mapping:
                    self.usage(
                        "invalid type '%s' defined in --schema, must be one of: %s"
                        % (arg, ', '.join(sorted(self.types_mapping.keys()))))
                # return self.types_mapping[arg]
                module = __import__('pyspark.sql.types', globals(), locals(),
                                    ['types'], -1)
                class_ = getattr(module, self.types_mapping[arg])
                _ = class_()
                return _

            def create_struct(arg):
                name = arg
                data_type = 'string'
                if ':' in arg:
                    (name, data_type) = arg.split(':', 1)
                data_class = get_type(data_type)
                return StructField(name, data_class, True)

            # see https://github.com/databricks/spark-csv#python-api
            self.schema = StructType(
                [create_struct(_) for _ in schema.split(',')])
            log.info('generated CSV => Spark schema')

        conf = SparkConf().setAppName('HS PySpark CSV => Parquet')
        sc = SparkContext(conf=conf)  # pylint: disable=invalid-name
        sqlContext = SQLContext(sc)  # pylint: disable=invalid-name
        spark_version = sc.version
        log.info('Spark version detected as %s' % spark_version)

        if not isVersionLax(spark_version):
            die("Spark version couldn't be determined. " +
                support_msg('pytools'))

        # pylint: disable=invalid-name

        df = None
        if isMinVersion(spark_version, 1.4):
            if has_header and not schema:
                log.info('inferring schema from CSV headers')
                df = sqlContext.read.format('com.databricks.spark.csv')\
                     .options(header=header_str, inferschema='true')\
                     .load(csv_file)
            else:
                log.info('using explicitly defined schema')
                df = sqlContext.read\
                     .format('com.databricks.spark.csv')\
                     .options(header=header_str)\
                     .load(csv_file, schema=self.schema)
            df.write.parquet(parquet_dir)
        else:
            log.warn('running legacy code for Spark <= 1.3')
            if has_header and not schema:
                log.info('inferring schema from CSV headers')
                df = sqlContext.load(
                    source="com.databricks.spark.csv",
                    path=csv_file,
                    header=header_str,
                    inferSchema='true')
            elif self.schema:
                log.info('using explicitly defined schema')
                schema = self.schema
                df = sqlContext.load(
                    source="com.databricks.spark.csv",
                    path=csv_file,
                    header=header_str,
                    schema=schema)
            else:
                die('no header and no schema, caught late')
            df.saveAsParquetFile(parquet_dir)