def run_mapred_job(self, key_prefix='', raise_on_error=False): rec = self.test_config # Build environment for command. env = None hadoop_command_env = rec.get('%shadoop_command_env' % key_prefix) if hadoop_command_env: env = dict(os.environ) env.update(hadoop_command_env) t0 = datetime.datetime.utcnow() return_code, output, errors = system_command( rec['%shadoop_command' % key_prefix], print_command=True, print_output=True, timeout=rec.get('%scommand_timeout_sec' % key_prefix), raise_on_error=False, shell=False, noop=rec.get('%snoop' % key_prefix, False), env=env) t1 = datetime.datetime.utcnow() td = t1 - t0 rec['%sutc_begin' % key_prefix] = t0.isoformat() rec['%sutc_end' % key_prefix] = t1.isoformat() rec['%selapsed_sec' % key_prefix] = time_duration_to_seconds(td) rec['%serror' % key_prefix] = (return_code != 0) rec['%scommand_timed_out' % key_prefix] = (return_code == -1) rec['%sexit_code' % key_prefix] = return_code rec['%soutput' % key_prefix] = output rec['%serrors' % key_prefix] = errors rec['%sbytes_read_hdfs' % key_prefix] = float( regex_first_group('Bytes Read=(.*)', errors, return_on_no_match='nan', search=True)) rec['%sbytes_written_hdfs' % key_prefix] = float( regex_first_group('Bytes Written=(.*)', errors, return_on_no_match='nan', search=True)) rec['%shadoop_job_id' % key_prefix] = regex_first_group( 'Running job: (job_[0-9_]+)', errors, search=True) if rec['%serror' % key_prefix]: raise Exception('Hadoop job failed')
def get_isilon_version_tuple(self, isi_version_output): def try_int(x): try: return int(x) except: return x s = regex_first_group('.*Isilon OneFS v(.*?) ', isi_version_output) return tuple(try_int(d) for d in s.split('.'))
def run_test(self): config = self.test_config self.hadoop_authenticate() self.configure_environment() base_directory = config['base_directory'] % config data_size_MB = config['data_size_MB'] config['requested_data_size_MB'] = data_size_MB data_directory = '%s/TestDFSIO' % base_directory if not 'jar' in config: config['jar'] = config['job_client_jar'] generic_options = self.get_hadoop_parameters() generic_options.append('-Dtest.build.data=%s' % data_directory) options = [] options.extend(['-nrFiles', '%s' % config['map_tasks']]) options.extend(['-bufferSize', '%s' % config['buffer_size']]) config['job_name'] = 'TestDFSIO,%(test)s,%(map_tasks)d,%(data_size_MB)dMB' % config file_size_MB = int(data_size_MB / config['map_tasks']) options.extend(['-size', '%dMB' % file_size_MB]) options.append('-%s' % config['test']) generic_options.append('-Dmapreduce.job.name=%s' % config['job_name']) # TestDFSIO doesn't use the correct file system. We may need to set fs.defaultFS. # Note that this will break the subsequent fetch of job info. # viprfs://hwxecs1bucket1.ns1.Site1/benchmarks/TestDFSIO # default_fs = regex_first_group('(.*://.*/).*', base_directory) # if default_fs: # generic_options.append('-Dfs.defaultFS=%s' % default_fs) # original_default_fs = 'hdfs://hwxecs2-master-0.solarch.local:8020' # generic_options.append('-Dmapreduce.jobhistory.done-dir=%s/mr-history/done' % original_default_fs) # generic_options.append('-Dmapreduce.jobhistory.intermediate-done-dir=%s/mr-history/tmp' % original_default_fs) # generic_options.append('-Dyarn.app.mapreduce.am.staging-dir=%s/user' % original_default_fs) cmd = [] cmd.extend(['hadoop', 'jar', config['jar'], 'TestDFSIO']) cmd.extend(generic_options) cmd.extend(options) config['hadoop_command'] = cmd self.run_mapred_job() if not config['error']: config['data_size_MB'] = float(regex_first_group('Total MBytes processed: (.*)', config['errors'], return_on_no_match='nan', search=True)) config['total_io_rate_MB_per_sec'] = config['data_size_MB'] / config['elapsed_sec'] self.record_result() if config['error']: raise Exception('Hadoop job failed')
def run_mapred_job(self): config = self.test_config with self.metrics_collector_context(): self.start_metrics() # Build environment for command. env = None hadoop_command_env = config.get('hadoop_command_env') if hadoop_command_env: env = dict(os.environ) env.update(hadoop_command_env) logging.info('*****************************************************************'); logging.info(config['test_desc']) t0 = datetime.datetime.utcnow() exit_code, output, errors = system_command(config['hadoop_command'], print_command=True, print_output=True, raise_on_error=False, shell=False, noop=config['noop'], env=env, timeout=config.get('command_timeout_sec',None)) t1 = datetime.datetime.utcnow() td = t1 - t0 config['utc_begin'] = t0.isoformat() config['utc_end'] = t1.isoformat() config['elapsed_sec'] = time_duration_to_seconds(td) config['error'] = (exit_code != 0) config['command_timed_out'] = (exit_code == -1) config['exit_code'] = exit_code config['output'] = output config['errors'] = errors config['bytes_read_hdfs'] = float(regex_first_group('Bytes Read=(.*)', errors, return_on_no_match='nan', search=True)) config['bytes_written_hdfs'] = float(regex_first_group('Bytes Written=(.*)', errors, return_on_no_match='nan', search=True)) config['hadoop_job_id'] = regex_first_group('Running job: (job_[0-9_]+)', errors, search=True) self.get_completed_job_info()
def run_query(query_config): rec = query_config print_output = rec.get('print_output', True) stream_id = rec.get('stream_id', 0) rec['db_name'] = rec['db_name'] % rec if rec.get('kill_all_yarn_jobs_before_each_query', False): kill_all_yarn_jobs() rec['query_filename_contents'] = read_file_to_string(rec['query_filename']) shell = False db_type = rec['db_type'] # Build query command. if db_type == 'hawq': cmd = [] cmd.extend(['psql']) cmd.extend(['-v', 'ON_ERROR_STOP=1']) cmd.extend(['-d', rec['db_name']]) cmd.extend(['-tAf', rec['query_filename']]) elif db_type == 'hive': if not 'hiveconf:hive.tez.java.opts' in rec and 'java_opts_xmx_ratio' in rec and 'hiveconf:hive.tez.container.size' in rec: rec['hiveconf:hive.tez.java.opts'] = '-Xmx%dm' % ( rec['hiveconf:hive.tez.container.size'] * rec['java_opts_xmx_ratio']) hiveconf = [] for k, v in rec.items(): prop = regex_first_group('^hiveconf:(.*)', k) if prop: hiveconf.extend(['--hiveconf', '"%s=%s"' % (prop, v)]) cmd = [] cmd.extend(['hive']) cmd.extend(['--database', rec['db_name']]) cmd.extend(['-f', rec['query_filename']]) if 'hive_init_file' in rec: cmd.extend(['-i', rec['hive_init_file']]) # Record contents of file in result. rec['hive_init_file_contents'] = read_file_to_string( rec['hive_init_file']) cmd.extend(hiveconf) elif db_type == 'impala': cmd = [] cmd.extend(['impala-shell']) cmd.extend([ '--impalad', '%s:%d' % (rec.get('impalad_host', 'localhost'), rec.get('impalad_port', 21000)) ]) cmd.extend(['--database', rec['db_name']]) cmd.extend(['-f', rec['query_filename']]) cmd.extend(['-B']) # turn off pretty printing cmd.extend(['-o', '/dev/null']) if rec.get('profile_query'): cmd.extend(['--show_profiles']) else: raise ('Unknown db_type') logging.info('%d: # %s' % (stream_id, ' '.join(cmd))) rec['query_command'] = cmd t0 = datetime.datetime.utcnow() # Run query. return_code, output, errors = system_command(cmd, print_command=False, print_output=print_output, timeout=rec.get( 'command_timeout_sec', None), raise_on_error=False, shell=shell) t1 = datetime.datetime.utcnow() td = t1 - t0 rec['utc_begin'] = t0.isoformat() rec['utc_end'] = t1.isoformat() rec['elapsed_sec'] = time_duration_to_seconds(td) rec['error'] = (return_code != 0) rec['exit_code'] = return_code rec['command_timed_out'] = (return_code == -1) rec['output'] = output rec['errors'] = errors rec['record_type'] = 'query_result' # Parse query output to determine elapsed time and rows returned. if db_type == 'hive': rec['application_id'] = regex_first_group( '\\(Executing on YARN cluster with App id (application_.*)\\)$', errors, return_on_no_match=None, search=True, flags=re.MULTILINE) # Extract actual query duration from stderr text. Note that we must find the last occurance of 'Time taken'. query_elapsed_sec = regex_first_group('Time taken: ([0-9.]+) seconds', errors, return_on_no_match='nan', search=True, flags=re.MULTILINE, match_last=True) if query_elapsed_sec == 'nan': logging.warn('Time taken not returned by command.') rec['error'] = True rec['query_elapsed_sec'] = float(query_elapsed_sec) rec['non_query_elapsed_sec'] = rec['elapsed_sec'] - rec[ 'query_elapsed_sec'] # Extract row count from stderr text. Note that some queries will not report fetched rows. query_rows_returned = regex_first_group('Fetched: ([0-9]+) row', errors, return_on_no_match='0', search=True, flags=re.MULTILINE) rec['query_rows_returned'] = int(query_rows_returned) logging.info( 'error=%d, query_elapsed_sec=%f, non_query_elapsed_sec=%f, query_rows_returned=%d' % (rec['error'], rec['query_elapsed_sec'], rec['non_query_elapsed_sec'], rec['query_rows_returned'])) elif db_type == 'impala': # Extract actual query duration from stderr text. # Fetched 100 row(s) in 0.98s query_elapsed_sec = regex_first_group( 'Fetched [0-9]+ row\\(s\\) in ([0-9.]+)s', errors, return_on_no_match='nan', search=True, flags=re.MULTILINE, match_last=True) if query_elapsed_sec == 'nan': logging.warn('Time taken not returned by command.') rec['error'] = True rec['query_elapsed_sec'] = float(query_elapsed_sec) rec['non_query_elapsed_sec'] = rec['elapsed_sec'] - rec[ 'query_elapsed_sec'] # Extract row count from stderr text. Note that some queries will not report fetched rows. query_rows_returned = regex_first_group('Fetched ([0-9]+) row\\(s\\)', errors, return_on_no_match='0', search=True, flags=re.MULTILINE) rec['query_rows_returned'] = int(query_rows_returned) logging.info( 'error=%d, query_elapsed_sec=%f, non_query_elapsed_sec=%f, query_rows_returned=%d' % (rec['error'], rec['query_elapsed_sec'], rec['non_query_elapsed_sec'], rec['query_rows_returned'])) else: rec['query_elapsed_sec'] = rec['elapsed_sec'] rec['non_query_elapsed_sec'] = 0.0 rec['query_rows_returned'] = np.nan # Handle errors. if rec['error']: logging.info('%d: return_code=%d' % (stream_id, return_code)) if not print_output: logging.info('%d: %s' % (stream_id, output)) if db_type == 'hive': # Kill YARN application if rec['application_id']: kill_yarn_job(rec['application_id']) if errors != '': if not print_output: logging.info('%d: %s' % (stream_id, errors)) if not rec['error']: logging.info('%d: %s: %0.3f seconds' % (stream_id, rec['query_filename'], rec['elapsed_sec'])) return rec