def __init__(self): # param => Parameter(pname, datatype, belongs) self.parameters = util.parameters self.rtype_file = cfg.hadoop_realworld_type self.read_rtype(self.rtype_file) # create a folder to keep Coq configurations self.coq_conf_folder = cfg.gen_confs + os.sep + 'coq_confs' util.make_folder_ready(self.coq_conf_folder)
def profile(self, itertime, in_conf): ''' Profile the Hadoop system with the given configuration :param conf: a new configuration :return: performance ''' # return itertime conf = in_conf.copy() self.itertime = itertime # prepare the system with new configurations # generate configuration files self.curr_genconf_folder = cfg.gen_confs + os.sep + 'conf' + str( self.itertime) util.make_folder_ready(self.curr_genconf_folder) # now merge the original configurations with new ones for p, v in self.original_confs.iteritems(): # if p not in conf: # print 'new configuration tries to update the old one:', p conf[p] = v # the default configuration # print itertime # if itertime == 0: # print conf # print 'Chong: updated configs: ', conf confs = util.write_into_conf_file(conf, self.curr_genconf_folder) self.copy_new_conf(confs) ''' No need to restart Hadoop. Only need to copy new configuration files to the configuration folder on Master node. HiBench will use those configuration files when submit a new job. ''' # if self.restart_hadoop_with_new_conf(confs) != 0: # print 'Error....prepare system failed.' # return sys.maxsize # profile the system to get its performance # execute HiBench here cpu_times = [] for i in range(3): success, elapsed_time = self.call_benchmark() # print 'profile time: ', elapsed_time if success: cpu_time = self.get_cpu_time_from_output() cpu_times.append(cpu_time) else: # clear output of the last run self.hibench_output = '' # clear cpu_times cpu_times = [] # if any one of these runs failed, that means this configuration is bad # no need to test more, fail fast break cpu_times = [t for t in cpu_times if t < sys.maxsize] average_cpu_time = sys.maxsize # maximum time if len(cpu_times) > 0: average_cpu_time = np.mean(cpu_times) return int(average_cpu_time)
def __init__(self, config): self.config = config self.itertime = 0 util.make_folder_ready(self.config.gen_confs) # backup some critical settings self.conf_file = os.sep.join( [self.config.sys_home, 'conf', 'spark-defaults.conf']) self.original_confs = self.parse_orig_confs(self.conf_file) self.avg_run_time = 2000 # seconds self.hibench_output = ''
def profile_with_type_checker(self, profile_num, conf): # type checker for cnf perf = sys.maxsize if not self.type_checker.check(profile_num, conf): print 'type-checking failed, config', str(profile_num) self.invalid_confs.append(profile_num) genconf_folder = self.curr_genconf_folder + str(profile_num) util.make_folder_ready(genconf_folder) tmp_conf = cnf.copy() for p, v in self.profiler.original_confs.iteritems(): tmp_conf[p] = v util.write_into_conf_file(tmp_conf, genconf_folder) else: perf = self.profiler.profile(profile_num, conf) return perf
def profile_conf(self, conf): # print 'Enter profile_confs' perf = sys.maxsize # type checker for cnf if not self.type_checker.check(self.profile_num, conf): print 'type-checking failed, config', str(self.profile_num) self.invalid_confs.append(self.profile_num) genconf_folder = self.curr_genconf_folder + str(self.profile_num) util.make_folder_ready(genconf_folder) tmp_conf = conf.copy() for p, v in self.profiler.original_confs.iteritems(): tmp_conf[p] = v util.write_into_conf_file(tmp_conf, genconf_folder) self.profile_num += 1 return perf perf = self.profiler.profile(self.profile_num, conf) print time.strftime( "%Y-%d-%m %H:%M:%S" ), self.profile_num, 'benchmark done! Performance: ', perf self.profile_num += 1 return perf
def start(self): self.backup_folder = 'backup' util.make_folder_ready(self.backup_folder) # read original hadoop configurations self.backup_original_confs()