Beispiel #1
0
    def run_cmd_sudo(self,
                     cmd,
                     host,
                     machine_config,
                     fail_ok=False,
                     return_dict=None,
                     proc_counter=None):

        cxn = self.new_connection(host, machine_config)
        res = None
        try:
            res = cxn.sudo(cmd, hide=False)
            # utils.warn("Ran command: {}".format(cmd))
            # res.stdout.strip()
            if return_dict is not None and proc_counter is not None:
                return_dict[proc_counter] = True
            return
        except:
            if not fail_ok:
                utils.error("Failed to run cmd {} on host {}.".format(
                    cmd, host))
                if return_dict is not None and proc_counter is not None:
                    return_dict[proc_counter] = False
                return
            if return_dict is not None and proc_counter is not None:
                return_dict[proc_counter] = True
                return
 def get_hosts(self, program, programs_metadata):
     ret = []
     if program == "start_server":
         return [programs_metadata[program]["hosts"][0]]
     elif program == "start_client":
         options = programs_metadata[program]["hosts"]
         return self.get_iteration_clients(options)
     else:
         utils.error("Unknown program name: {}".format(program))
         exit(1)
     return ret
 def find_rate(self, client_options, host):
     rates = []
     for info in self.client_rates:
         rate = info[0]
         num = info[1]
         for idx in range(num):
             rates.append(rate)
     try:
         rate_idx = client_options.index(host)
         return rates[rate_idx]
     except:
         utils.error("Host {} not found in client options {}.".format(
             host, client_options))
         exit(1)
 def get_iterations(self, total_args):
     if total_args.exp_type == "individual":
         if total_args.num_clients > self.config_yaml["max_clients"]:
             utils.error(
                 "Cannot have {} clients, greater than max {}".format(
                     total_args.num_clients,
                     self.config_yaml["max_clients"]))
             exit(1)
         client_rates = [(total_args.rate, total_args.num_clients)]
         it = ScatterGatherIteration(client_rates, total_args.segment_size,
                                     total_args.num_mbufs,
                                     total_args.with_copy,
                                     total_args.as_one)
         num_trials_finished = utils.parse_number_trials_done(
             it.get_parent_folder(total_args.folder))
         it.set_trial(num_trials_finished)
         return [it]
     else:
         ret = []
         for trial in range(utils.NUM_TRIALS):
             for segment_size in SEGMENT_SIZES_TO_LOOP:
                 max_num_mbufs = MBUFS_MAX
                 for num_mbufs in range(1, max_num_mbufs + 1):
                     rate_gbps = MAX_RATE_GBPS
                     rate = utils.get_tput_pps(rate_gbps,
                                               segment_size * num_mbufs)
                     rate = min(MAX_RATE_PPS, rate)
                     it = ScatterGatherIteration([(rate, 1)],
                                                 segment_size,
                                                 num_mbufs,
                                                 False,
                                                 False,
                                                 trial=trial)
                     it_wc = ScatterGatherIteration([(rate, 1)],
                                                    segment_size,
                                                    num_mbufs,
                                                    True,
                                                    False,
                                                    trial=trial)
                     it_as_one = ScatterGatherIteration([(rate, 1)],
                                                        segment_size,
                                                        num_mbufs,
                                                        True,
                                                        True,
                                                        trial=trial)
                     ret.append(it)
                     ret.append(it_wc)
                     ret.append(it_as_one)
         return ret
 def get_program_args(self, folder, program, host, config_yaml,
                      programs_metadata, exp_time):
     ret = {}
     if program == "start_server":
         ret["cornflakes_dir"] = config_yaml["cornflakes_dir"]
         ret["server_ip"] = config_yaml["hosts"][host]["ip"]
         if self.with_copy:
             ret["with_copy"] = " --with_copy"
         else:
             ret["with_copy"] = ""
         ret["folder"] = str(folder)
     elif program == "start_client":
         # set with_copy, segment_size, num_mbufs based on if it is with_copy
         if self.with_copy:
             ret["with_copy"] = " --with_copy"
             if self.as_one:
                 ret["as_one"] = "as_one"
                 ret["segment_size"] = self.segment_size * self.num_mbufs
                 ret["num_mbufs"] = 1
             else:
                 ret["segment_size"] = self.segment_size
                 ret["num_mbufs"] = self.num_mbufs
         else:
             ret["with_copy"] = ""
             ret["segment_size"] = self.segment_size
             ret["num_mbufs"] = self.num_mbufs
         # calculate client rate
         host_options = self.get_iteration_clients(
             programs_metadata[program]["hosts"])
         rate = self.find_rate(host_options, host)
         server_host = programs_metadata["start_server"]["hosts"][0]
         ret["cornflakes_dir"] = config_yaml["cornflakes_dir"]
         ret["server_ip"] = config_yaml["hosts"][server_host]["ip"]
         ret["host_ip"] = config_yaml["hosts"][host]["ip"]
         ret["server_mac"] = config_yaml["hosts"][server_host]["mac"]
         ret["rate"] = rate
         ret["time"] = exp_time
         ret["latency_log"] = "{}.latency.log".format(host)
         ret["host"] = host
         ret["folder"] = str(folder)
     else:
         utils.error("Unknown program name: {}".format(program))
         exit(1)
     return ret
Beispiel #6
0
    def get_program_args(self, folder, program, host, config_yaml,
                         programs_metadata, exp_time):
        ret = {}
        ret["cornflakes_dir"] = config_yaml["cornflakes_dir"]
        ret["config_file"] = config_yaml["config_file"]
        ret["library"] = self.serialization
        ret["folder"] = str(folder)
        ret["server_message"] = self.server_message
        ret["message"] = self.message_type
        ret["server_size"] = self.server_size
        ret["size"] = self.size
        if program == "start_server":
            ret["zero_copy_recv"] = ""
            ret["copy_to_dma_memory"] = ""
            if self.recv_mode == "zero_copy_recv":
                ret["zero_copy_recv"] = " -z"
            elif self.recv_mode == "copy_to_dma_memory":
                ret["copy_to_dma_memory"] = "--copy_to_dma_memory"
        elif program == "start_client":
            ret["zero_copy_recv"] = " -z"  # always have zero_copy_recv on
            ret["copy_to_dma_memory"] = ""
            # calculate client rate
            host_options = self.get_iteration_clients(
                programs_metadata[program]["hosts"])
            rate = self.find_rate(host_options, host)
            ret["rate"] = rate

            # calculate server host
            server_host = programs_metadata["start_server"]["hosts"][0]
            ret["server_ip"] = config_yaml["hosts"][server_host]["ip"]

            # exp time
            ret["time"] = exp_time
            ret["host"] = host
        else:
            utils.error("Unknown program name: {}".format(program))
            exit(1)
        return ret
 def get_trial_string(self):
     if self.trial == None:
         utils.error("TRIAL IS NOT SET FOR ITERATION.")
         exit(1)
     return "trial_{}".format(self.trial)
Beispiel #8
0
    def get_iterations(self, total_args):
        if total_args.exp_type == "individual":
            if total_args.num_clients > int(self.config_yaml["max_clients"]):
                utils.error(
                    "Cannot have {} clients, greater than max {}".format(
                        total_args.num_clients,
                        self.config_yaml["max_clients"]))
                exit(1)
            client_rates = [(total_args.rate, total_args.num_clients)]
            it = DsQueryIteration(client_rates, total_args.server_size,
                                  total_args.size, total_args.serialization,
                                  total_args.server_message_type,
                                  total_args.message_type,
                                  total_args.recv_mode)
            num_trials_finished = utils.parse_number_trials_done(
                it.get_parent_folder(total_args.folder))
            if total_args.analysis_only or total_args.graph_only:
                ret = []
                for i in range(0, num_trials_finished):
                    it_clone = copy.deepcopy(it)
                    it_clone.set_trial(i)
                    ret.append(it_clone)
                return ret
            it.set_trial(num_trials_finished)
            return [it]

        else:
            # loop over the options
            ret = []
            for trial in range(3):
                for server_message_type in MESSAGE_TYPES:
                    for client_message_type in ["single"]:
                        for server_size in SIZES_TO_LOOP:
                            for client_size in CLIENT_SIZES_TO_LOOP:
                                for serialization in SERIALIZATION_LIBRARIES:
                                    if client_size > server_size:
                                        continue
                                    if server_size == 8192\
                                        and server_message_type == "tree-5"\
                                        and (serialization == "cornflakes-dynamic"
                                             or serialization == "cornflakes-1cdynamic"):
                                        continue
                                    recv_modes = ["zero_copy_recv"]
                                    for recv_mode in recv_modes:
                                        # for client rates:
                                        # do some testing to determine optimal rates
                                        client_rates = [[(24000, 1)],
                                                        [(48000, 1)],
                                                        [(72000, 1)],
                                                        [(96000, 1)]]
                                        for i in range(
                                                2,
                                                int(self.config_yaml[
                                                    "max_clients"])):
                                            client_rates.append([(100000, i)])
                                        for i in range(
                                                1,
                                                int(self.config_yaml[
                                                    "max_clients"])):
                                            client_rates.append([(120000, i)])
                                        # TODO: how do we get "mid range"
                                        # e.g.: points that exactly determine
                                        # where the knee is
                                        for rate in client_rates:
                                            it = DsQueryIteration(
                                                rate,
                                                server_size,
                                                client_size,
                                                serialization,
                                                server_message_type,
                                                client_message_type,
                                                recv_mode,
                                                trial=trial)
                                            ret.append(it)
            return ret
Beispiel #9
0
    def run(self,
            folder,
            exp_config,
            machine_config,
            pprint,
            program_version_info,
            use_perf=False):
        """
        Runs the actual program.
        Arguments:
            * folder - Path that all logfiles from this iteration should go.
            * exp_config - Experiment yaml that contains command lines. Assumes
            this contains a set of programs to run, each with a list of
            corresponding hosts that can run that command line.
            * machine_config - Machine level config yaml.
            * pprint - Instead of running, just print out command lines.
            * program_version_info - Metadata about the commit version of the
            repo at time of experiment.
            * use_perf - Whether to use perf or not when running the server.
        """
        programs_to_join_immediately = {}
        programs_to_kill = {}
        # map from start time (in seconds) to list
        # of programs with that start time
        programs_by_start_time = defaultdict(list)

        # assumes program processes to be executed are in order in the yaml
        commands = exp_config["commands"]
        programs = exp_config["programs"]
        exp_time = exp_config["time"]

        record_paths = {}

        # map from a program id to the actual process
        program_counter = 0
        proc_map = {}
        status_dict = {}
        manager = mp.Manager()
        status_dict = manager.dict()
        # spawn the commands
        for command in commands:
            program_name = command["program"]
            program = programs[program_name]
            program_hosts = program["hosts"]
            kill_cmd = None
            if "stop" in program:
                kill_cmd = program["stop"]
            for host in self.get_relevant_hosts(program, program_name):
                program_cmd = program["start"]
                if "log" in program:
                    if "out" in program["log"]:
                        stdout = program["log"]["out"]
                        program_cmd += " > {}".format(stdout)
                    if "err" in program["log"]:
                        stderr = program["log"]["err"]
                        program_cmd += " 2> {}".format(stderr)
                    if "record" in program["log"]:
                        record_path = program["log"]["record"]

                program_args = self.get_program_args(folder, program_name,
                                                     host, machine_config,
                                                     programs, exp_time)
                program_cmd = program_cmd.format(**program_args)
                if use_perf and "perf" in program:
                    utils.debug("current program args: {}", program_args)
                    perf_cmd = program["perf"].format(**program_args)
                    program_cmd = "{} {}".format(perf_cmd, program_cmd)
                record_path = record_path.format(**program_args)
                fail_ok = False
                if kill_cmd is not None:
                    kill_cmd = kill_cmd.format(**program_args)
                    fail_ok = True

                yaml_record = {
                    "host": host,
                    "args": program_args,
                    "command": program_cmd,
                    "stop_command": kill_cmd,
                    "version_info": program_version_info
                }

                if pprint:
                    utils.debug(
                        "Host {}: \n\t - Running Cmd: {}\n\t - Stopped by: {}".
                        format(host, program_cmd, kill_cmd))

                else:
                    record_paths[record_path] = yaml_record
                    proc = mp.Process(target=self.run_cmd_sudo,
                                      args=(program_cmd, host, machine_config,
                                            fail_ok, status_dict,
                                            program_counter))

                    start_time = int(command["begin"])
                    proc_map[program_counter] = proc
                    programs_by_start_time[start_time].append(
                        (kill_cmd, program_counter, program_name, host,
                         program_args))
                    program_counter += 1
        # now start each start program
        cur_time = 0
        program_start_times = sorted(programs_by_start_time.keys())
        for start_time in program_start_times:
            if start_time != cur_time:
                time.sleep(start_time - cur_time)
            cur_time = start_time
            progs = programs_by_start_time[start_time]
            for info in progs:
                kill_cmd = info[0]
                program_counter = info[1]
                program_name = info[2]
                host = info[3]
                proc = proc_map[program_counter]
                program_args = info[4]
                utils.debug("Starting program {} on host {}, args: {}".format(
                    program_name, host, program_args))
                proc.start()
                if kill_cmd == None:
                    programs_to_join_immediately[host] = program_counter
                else:
                    programs_to_kill[host] = (program_counter, kill_cmd)

        any_failed = False
        # now join all of the joining programs
        for host in programs_to_join_immediately:
            prog_counter = programs_to_join_immediately[host]
            proc = proc_map[prog_counter]
            res = proc.join()
            status = status_dict[prog_counter]
            if not status:
                any_failed = True
            utils.debug("Host {} done; status: {}".format(host, status))

        # now kill the rest of the programs
        for host in programs_to_kill:
            (program_counter, kill_cmd) = programs_to_kill[host]
            try:
                kill_cmd_with_sleep = kill_cmd + "; /bin/sleep 3"
                utils.debug("Trying to run kill command: {} on host {}".format(
                    kill_cmd, host))
                self.kill_remote_process(kill_cmd_with_sleep, host,
                                         machine_config)
            except:
                utils.warn("Failed to run kill command:",
                           "{}".format(kill_cmd_with_sleep))
                exit(1)
            try:
                proc_map[program_counter].join()
            except:
                utils.warn(
                    "Failed to run join command: {}".format(program_counter))

        # now, experiment is over, so record experiment metadata
        for record_path in record_paths:
            yaml_record = record_paths[record_path]
            with open(record_path, 'w') as file:
                yaml.dump(yaml_record, file)
            file.close()

        if any_failed:
            utils.error("One of the programs failed.")
            return False
        return True
Beispiel #10
0
    def get_iterations(self, total_args):
        if total_args.exp_type == "individual":
            if total_args.num_clients > int(self.config_yaml["max_clients"]):
                utils.error(
                    "Cannot have {} clients, greater than max {}".format(
                        total_args.num_clients,
                        self.config_yaml["max_clients"]))
                exit(1)
            client_rates = [(total_args.rate, total_args.num_clients)]
            it = EchoBenchIteration(client_rates, total_args.size,
                                    total_args.serialization,
                                    total_args.message_type,
                                    total_args.recv_mode)
            num_trials_finished = utils.parse_number_trials_done(
                it.get_parent_folder(total_args.folder))
            if total_args.analysis_only or total_args.graph_only:
                ret = []
                for i in range(0, num_trials_finished):
                    it_clone = copy.deepcopy(it)
                    it_clone.set_trial(i)
                    ret.append(it_clone)
                return ret
            it.set_trial(num_trials_finished)
            return [it]

        else:
            # loop over the options
            ret = []
            for trial in range(3):
                # for trial in range(utils.NUM_TRIALS):
                for message_type in MESSAGE_TYPES:
                    for size in SIZES_TO_LOOP:
                        for serialization in SERIALIZATION_LIBRARIES:
                            if size == 8192\
                                and message_type == "tree-5"\
                                and (serialization == "cornflakes-dynamic"
                                     or serialization == "cornflakes-1cdynamic"):
                                continue
                            for recv_mode in RECV_TYPES:
                                # for client rates:
                                # for now loop over 2 rates and 1-2 machines
                                # do some testing to determine optimal rates
                                client_rates = [[(24000, 1)], [(48000, 1)],
                                                [(72000, 1)], [(96000, 1)]]
                                for i in range(
                                        2,
                                        int(self.config_yaml["max_clients"])):
                                    client_rates.append([(100000, i)])
                                for i in range(
                                        1,
                                        int(self.config_yaml["max_clients"])):
                                    client_rates.append([(120000, i)])
                                for rate in client_rates:
                                    it = EchoBenchIteration(rate,
                                                            size,
                                                            serialization,
                                                            message_type,
                                                            recv_mode,
                                                            trial=trial)
                                    ret.append(it)
            return ret