Esempio n. 1
0
 def __init__(self):
     # param => Parameter(pname, datatype, belongs)
     self.parameters = util.parameters
     self.rtype_file = cfg.hadoop_realworld_type
     self.read_rtype(self.rtype_file)
     # create a folder to keep Coq configurations
     self.coq_conf_folder = cfg.gen_confs + os.sep + 'coq_confs'
     util.make_folder_ready(self.coq_conf_folder)
Esempio n. 2
0
    def profile(self, itertime, in_conf):
        '''
        Profile the Hadoop system with the given configuration
        :param conf: a new configuration
        :return: performance
        '''
        # return itertime
        conf = in_conf.copy()
        self.itertime = itertime
        # prepare the system with new configurations
        # generate configuration files
        self.curr_genconf_folder = cfg.gen_confs + os.sep + 'conf' + str(
            self.itertime)
        util.make_folder_ready(self.curr_genconf_folder)
        # now merge the original configurations with new ones
        for p, v in self.original_confs.iteritems():
            # if p not in conf:
            #      print 'new configuration tries to update the old one:', p
            conf[p] = v
        # the default configuration
        # print itertime
        # if itertime == 0:
        #     print conf
        # print 'Chong: updated configs: ', conf
        confs = util.write_into_conf_file(conf, self.curr_genconf_folder)

        self.copy_new_conf(confs)
        '''
        No need to restart Hadoop. Only need to copy new configuration files to
        the configuration folder on Master node.
        HiBench will use those configuration files when submit a new job.
        '''
        # if self.restart_hadoop_with_new_conf(confs) != 0:
        #     print 'Error....prepare system failed.'
        #     return sys.maxsize

        # profile the system to get its performance
        # execute HiBench here
        cpu_times = []
        for i in range(3):
            success, elapsed_time = self.call_benchmark()
            # print 'profile time: ', elapsed_time
            if success:
                cpu_time = self.get_cpu_time_from_output()
                cpu_times.append(cpu_time)
            else:
                # clear output of the last run
                self.hibench_output = ''
                # clear cpu_times
                cpu_times = []
                # if any one of these runs failed, that means this configuration is bad
                # no need to test more, fail fast
                break
        cpu_times = [t for t in cpu_times if t < sys.maxsize]
        average_cpu_time = sys.maxsize  # maximum time
        if len(cpu_times) > 0:
            average_cpu_time = np.mean(cpu_times)
        return int(average_cpu_time)
Esempio n. 3
0
 def __init__(self, config):
     self.config = config
     self.itertime = 0
     util.make_folder_ready(self.config.gen_confs)
     # backup some critical settings
     self.conf_file = os.sep.join(
         [self.config.sys_home, 'conf', 'spark-defaults.conf'])
     self.original_confs = self.parse_orig_confs(self.conf_file)
     self.avg_run_time = 2000  # seconds
     self.hibench_output = ''
Esempio n. 4
0
 def profile_with_type_checker(self, profile_num, conf):
     # type checker for cnf
     perf = sys.maxsize
     if not self.type_checker.check(profile_num, conf):
         print 'type-checking failed, config', str(profile_num)
         self.invalid_confs.append(profile_num)
         genconf_folder = self.curr_genconf_folder + str(profile_num)
         util.make_folder_ready(genconf_folder)
         tmp_conf = cnf.copy()
         for p, v in self.profiler.original_confs.iteritems():
             tmp_conf[p] = v
         util.write_into_conf_file(tmp_conf, genconf_folder)
     else:
         perf = self.profiler.profile(profile_num, conf)
     return perf
Esempio n. 5
0
 def profile_conf(self, conf):
     # print 'Enter profile_confs'
     perf = sys.maxsize
     # type checker for cnf
     if not self.type_checker.check(self.profile_num, conf):
         print 'type-checking failed, config', str(self.profile_num)
         self.invalid_confs.append(self.profile_num)
         genconf_folder = self.curr_genconf_folder + str(self.profile_num)
         util.make_folder_ready(genconf_folder)
         tmp_conf = conf.copy()
         for p, v in self.profiler.original_confs.iteritems():
             tmp_conf[p] = v
         util.write_into_conf_file(tmp_conf, genconf_folder)
         self.profile_num += 1
         return perf
     perf = self.profiler.profile(self.profile_num, conf)
     print time.strftime(
         "%Y-%d-%m %H:%M:%S"
     ), self.profile_num, 'benchmark done! Performance: ', perf
     self.profile_num += 1
     return perf
Esempio n. 6
0
 def start(self):
     self.backup_folder = 'backup'
     util.make_folder_ready(self.backup_folder)
     # read original hadoop configurations
     self.backup_original_confs()