def management_post():
    global coordinator_db, username
    print "Management post"

    if bottle.request.POST.kill_themis or bottle.request.POST.remount_disks or bottle.request.POST.format_disks:
        return_code, stdout, stderr = parallel_ssh(
            coordinator_db.redis_client,
            "pkill -f mapreduce\|cluster_coordinator.py\|node_coordinator.py\|"
            "resource_monitor_gui.py\|job_status.py",
            username,
            None,
            False,
            True,
        )

    if bottle.request.POST.remount_disks:
        return_code, stdout, stderr = parallel_ssh(
            coordinator_db.redis_client, MOUNT_SCRIPT, username, None, False, False
        )

    if bottle.request.POST.format_disks:
        return_code, stdout, stderr = parallel_ssh(
            coordinator_db.redis_client, "%s --format_disks" % MOUNT_SCRIPT, username, None, False, False
        )

    updated = False
    if bottle.request.POST.update_repo:
        recompile = bottle.request.POST.getall("recompile")
        recompile_option = ""
        if len(recompile) == 1 and recompile[0] == "True":
            recompile_option = "-r"

        branch = bottle.request.POST.branch
        return_code, stdout, stderr = parallel_ssh(
            coordinator_db.redis_client,
            "%s -b %s %s" % (UPDATE_SCRIPT, branch, recompile_option),
            username,
            None,
            False,
            True,
        )

        updated = True

    return management(updated=updated)
def dfs_lookup(path):
    global coordinator_db, jinja_env, disk_mountpoint, username
    if path == None:
        path = disk_mountpoint

    # Make path canonical
    if path[0] != "/":
        path = "/%s" % path

    print path
    path = os.path.expanduser(path)
    print path
    path = os.path.realpath(path)

    if path[len(path) - 1] != "/":
        path = "%s/" % path

    print "DFS lookup for %s" % path

    # Make sure path is within our valid disk directory
    if path[0 : len(disk_mountpoint)] != disk_mountpoint:
        print "Illegal prefix for path %s" % path
        return

    return_code, stdout, stderr = parallel_ssh(
        coordinator_db.redis_client,
        'find %s -maxdepth 1 -mindepth 1 -printf "%%f,%%y,%%s\\n"' % path,
        username,
        None,
        False,
        False,
    )

    directories = set()
    files = []
    total_size = 0
    for host, results in stdout.iteritems():
        results = results.split("\n")
        results = [x for x in results if len(x) > 0]

        for result in results:
            file_name, file_type, file_size = tuple(result.split(","))
            if file_type == "d":
                directories.add(file_name)
            elif file_type == "f":
                files.append((file_name, file_size, host))
                total_size += int(file_size)

    diretories = list(directories)
    directories = sorted(directories)
    files = sorted(files, key=lambda x: x[0])

    # Add a wildcard for disks so we can explore across all disks
    if path == disk_mountpoint:
        directories.append("disk_*")

    return (directories, files, total_size, path)
Exemple #3
0
def dfs_lookup(path):
    global coordinator_db, jinja_env, disk_mountpoint, username
    if path == None:
        path = disk_mountpoint

    # Make path canonical
    if path[0] != "/":
        path = "/%s" % path

    print path
    path = os.path.expanduser(path)
    print path
    path = os.path.realpath(path)

    if path[len(path) - 1] != "/":
        path = "%s/" % path

    print "DFS lookup for %s" % path

    # Make sure path is within our valid disk directory
    if path[0:len(disk_mountpoint)] != disk_mountpoint:
        print "Illegal prefix for path %s" % path
        return

    return_code, stdout, stderr = parallel_ssh(
        coordinator_db.redis_client,
        "find %s -maxdepth 1 -mindepth 1 -printf \"%%f,%%y,%%s\\n\"" % path,
        username, None, False, False)

    directories = set()
    files = []
    total_size = 0
    for host, results in stdout.iteritems():
        results = results.split("\n")
        results = [x for x in results if len(x) > 0]

        for result in results:
            file_name, file_type, file_size = tuple(result.split(","))
            if file_type == "d":
                directories.add(file_name)
            elif file_type == "f":
                files.append((file_name, file_size, host))
                total_size += int(file_size)

    diretories = list(directories)
    directories = sorted(directories)
    files = sorted(files, key=lambda x : x[0])

    # Add a wildcard for disks so we can explore across all disks
    if path == disk_mountpoint:
        directories.append("disk_*")


    return (directories, files, total_size, path)
Exemple #4
0
def management_post():
    global coordinator_db, username
    print "Management post"

    if bottle.request.POST.kill_themis or bottle.request.POST.remount_disks or \
            bottle.request.POST.format_disks:
        return_code, stdout, stderr = parallel_ssh(
            coordinator_db.redis_client,
            "pkill -f mapreduce\|cluster_coordinator.py\|node_coordinator.py\|"\
                "resource_monitor_gui.py\|job_status.py",
            username, None, False, True)

    if bottle.request.POST.remount_disks:
        return_code, stdout, stderr = parallel_ssh(coordinator_db.redis_client,
                                                   MOUNT_SCRIPT, username,
                                                   None, False, False)

    if bottle.request.POST.format_disks:
        return_code, stdout, stderr = parallel_ssh(
            coordinator_db.redis_client, "%s --format_disks" % MOUNT_SCRIPT,
            username, None, False, False)

    updated = False
    if bottle.request.POST.update_repo:
        recompile = bottle.request.POST.getall('recompile')
        recompile_option = ""
        if len(recompile) == 1 and recompile[0] == "True":
            recompile_option = "-r"

        branch = bottle.request.POST.branch
        return_code, stdout, stderr = parallel_ssh(
            coordinator_db.redis_client,
            "%s -b %s %s" % (UPDATE_SCRIPT, branch, recompile_option),
            username, None, False, True)

        updated = True

    return management(updated=updated)
def run_benchmark(
    binary,
    config,
    batch_directory,
    phase_directory,
    profiler,
    profiler_options,
    peer_list,
    node_list,
    per_peer_config,
    dump_core_directory,
    solo_mode,
    vnstat_interface,
    params,
):

    # Get ssh username.
    username = read_conf_file("cluster.conf", "cluster", "username")

    # Add command line parameters to binary
    binary = "%s -LOG_DIR %s" % (binary, phase_directory)

    if dump_core_directory is not None:
        binary = "cd %s; ulimit -c unlimited; %s" % (dump_core_directory, binary)

    processes = []
    start_time = time.time()
    for index, ip in enumerate(node_list):
        # Now start themis binaries
        if solo_mode:
            # Act as if you are the only peer in the cluster.
            peer_binary = "%s -PEER_LIST %s" % (binary, ip)
        else:
            # Use the entire set of peers and designate yourself as
            # one of them.
            peer_binary = "%s -PEER_LIST %s -MYPEERID %d" % (binary, peer_list, index)

        if per_peer_config:
            # Append the IP address to the config file name
            peer_binary = "%s -CONFIG %s.%s" % (peer_binary, config, ip)
        else:
            peer_binary = "%s -CONFIG %s" % (peer_binary, config)

        # Override config file with specified parameters
        if params:
            peer_binary = "%s %s" % (peer_binary, params)

        if profiler == "operf":
            # Use the batch directory as the operf session dir
            session_dir = os.path.join(batch_directory, "oprofile_data.%s", ip)
            parallel_ssh(None, "mkdir -p %s" % session_dir, username, node_list, False, True, False)
            peer_binary = "%s %s --session-dir=%s %s" % (profiler, profiler_options, session_dir, peer_binary)
        elif profiler is not None:
            # Some other profiler, just prepend it to the binary
            peer_binary = "%s %s %s" % (profiler, profiler_options, peer_binary)

        # Run the node-local benchmark script.
        vnstat_param_string = ""
        if vnstat_interface != None:
            vnstat_param_string = "--vnstat_interface %s" % vnstat_interface
        command = "%s %s \"%s/run_benchmark_local.py %s %s '%s'\"" % (
            ssh_command(),
            ip,
            THEMIS_SCRIPTS_DIR,
            vnstat_param_string,
            phase_directory,
            peer_binary,
        )

        processes.append((subprocess.Popen(command, shell=True), ip))

    print "%d tasks launched on %s\n" % (len(processes), time.asctime())

    elapsed_times = []
    completed_ips = []

    num_nodes = len(processes)

    while len(processes) > 0:
        for process, ip in processes:
            process.poll()
            if process.returncode != None:
                elapsed_time = time.time() - start_time
                process.communicate()
                processes.remove((process, ip))
                elapsed_times.append(elapsed_time)
                completed_ips.append(ip)
                print "Node %s completed in %.2f seconds (%d / %d)" % (ip, elapsed_time, len(elapsed_times), num_nodes)

                break

    stop_time = time.time()

    return (stop_time - start_time, elapsed_times, completed_ips)
def run_benchmark_iterations(
    binary,
    log_directory,
    config,
    peer_ips,
    profiler,
    profiler_options,
    iterations,
    sleep,
    delete_output,
    per_peer_config,
    dump_core_directory,
    solo_mode,
    stage_stats,
    interfaces,
    params="",
):

    # Get ssh username and themis directory
    username, themis_directory = read_conf_file("cluster.conf", "cluster", ["username", "themis_directory"])
    themis_directory = os.path.expanduser(themis_directory)
    # Get cloud provider if applicable.
    provider = read_conf_file("cluster.conf", "cluster", "provider")

    if interfaces == None:
        vnstat_interface = None
    else:
        interface_list = filter(lambda x: len(x) > 0, interfaces.split(","))
        vnstat_interface = interface_list[0]

    if not os.path.exists(config):
        sys.exit("Config file %s does not exist." % config)

    with open(config, "r") as fp:
        app_config = yaml.load(fp)

    # If we're using more than 1 network interface per peer, the peer list is
    # going to look like:
    # Peer1_interface1, Peer1_interface2, Peer2_interface1, Peer2_interface2, ..
    # In this case, we only want to launch the benchmark once per peer, so
    # make sure we only look at the first interface for each peer, and let
    # the application itself deal with the other interfaces.
    num_interfaces = 1
    if "NUM_INTERFACES" in app_config:
        num_interfaces = app_config["NUM_INTERFACES"]

    # Remove trailing comma if any from the IP list. This will be the string we
    # pass into the benchmark binary.
    peer_list = peer_ips.rstrip(",")

    # If we're using multiple interfaces, only launch the benchmark once per
    # node.
    node_list = peer_list.split(",")[::num_interfaces]

    # Look for description files in the same directory as the binary.
    binary_dir = os.path.dirname(binary)
    description_directory = os.path.join(binary_dir, "description")

    if not os.path.exists(description_directory):
        sys.exit("Could not find description directory %s" % (description_directory))

    # Check for the phase name. For simplicity we're going to require that
    # the benchmark have only 1 phase
    description = Description(description_directory)
    phases = description.getPhaseList()
    if len(phases) != 1:
        sys.exit("Benchmark must have exactly one phase. Got %s" % phases)
    phase_name = phases[0]

    data_size_per_node = int(app_config["BENCHMARK_DATA_SIZE_PER_NODE"][phase_name])
    data_size = data_size_per_node * len(node_list)

    total_throughputs = {}
    if stage_stats is not None:
        stage_stats = stage_stats.split(",")
        for stage in stage_stats:
            total_throughputs[stage] = 0.0

    node_benchmark_throughputs = []

    for i in xrange(iterations):
        # Pick a unique batch ID
        batch = 0
        while os.path.exists(os.path.join(log_directory, "batch_%d" % batch)):
            batch += 1
        batch_directory = os.path.join(log_directory, "batch_%d" % batch)

        # Create directories
        phase_directory = os.path.join(batch_directory, phase_name)
        parallel_ssh(None, "mkdir -p %s" % phase_directory, username, node_list, False, True, False)

        # Copy description files and create phase directory.
        if not os.path.exists(batch_directory):
            os.makedirs(batch_directory)
        shutil.copy(os.path.join(description_directory, "stages.json"), batch_directory)
        shutil.copy(os.path.join(description_directory, "structure.json"), batch_directory)
        os.chmod(os.path.join(batch_directory, "stages.json"), 0777)
        os.chmod(os.path.join(batch_directory, "structure.json"), 0777)

        # Copy config file
        shutil.copyfile(config, os.path.join(batch_directory, "config.yaml"))

        print "\nLogging to %s" % (batch_directory)
        print "Running %s with batch ID %d on %d nodes..." % (phase_name, batch, len(node_list))

        (elapsed, elapsed_times, completed_ips) = run_benchmark(
            binary,
            config,
            batch_directory,
            phase_directory,
            profiler,
            profiler_options,
            peer_list,
            node_list,
            per_peer_config,
            dump_core_directory,
            solo_mode,
            vnstat_interface,
            params,
        )

        # Compute overall throughput
        throughput = (data_size / elapsed) / 1000000
        per_node_throughput = (data_size_per_node / elapsed) / 1000000
        print "Completed in %.2f seconds." % elapsed
        print "  Throughput: %.2f MB/s" % throughput
        print "  Per-server: %.2f MB/s" % per_node_throughput

        # Record individual throughputs
        throughputs = [(data_size_per_node / x) / 1000000 for x in elapsed_times]
        node_benchmark_throughputs += throughputs

        # Dump these results to a file in the batch directory
        results_file = open(os.path.join(batch_directory, "results"), "w")
        results_file.write(
            "Runtime: %.2f seconds\nThroughput: %.2f MB/s\nPer-server: "
            "%.2f MB/s\n\n" % (elapsed, throughput, per_node_throughput)
        )
        results_file.write("Node throughputs: %s\n\n" % throughputs)
        for ip, elapsed_time, throughput in zip(completed_ips, elapsed_times, throughputs):
            results_file.write("Node %s completed in %.2f seconds (%.2f MB/s)\n" % (ip, elapsed_time, throughput))
        results_file.write("\n")

        if stage_stats is not None:
            # Compute runtime stat throughputs

            done = False
            while not done:
                # Upload all logs.
                upload_logs()

                # Download logs locally.
                download_logs()

                try:
                    runtime_info = gather_runtime_info(batch_directory, False)
                    done = True
                except ValueError:
                    print "Runtime info script failed. Retrying log upload/downloads."

            stage_info = runtime_info[0]["stages"]
            node_throughputs = {}
            for worker_info in stage_info:
                stats_info = worker_info["stats_info"]
                # We only want to look at the overall stats, which includes all
                # nodes (hostname or worker ID won't be specified)
                if len(stats_info) == 1:
                    stage_name = stats_info["stage"]

                    if stage_name in stage_stats:
                        # This is one of the stages we care about
                        node_throughputs[stage_name] = worker_info["observed_processing_rate_per_node"]
                        total_throughputs[stage_name] += node_throughputs[stage_name]

            # Print throughputs in the correct order.
            for stage_name in stage_stats:
                print "  %s throughput: %.2f MB/s/node" % (stage_name, node_throughputs[stage_name])
                results_file.write("%s throughput: %.2f MB/s\n" % (stage_name, node_throughputs[stage_name]))

        results_file.close()

        if delete_output and "OUTPUT_DISK_LIST" in app_config and phase_name in app_config["OUTPUT_DISK_LIST"]:
            output_disk_list = app_config["OUTPUT_DISK_LIST"][phase_name]
            output_disks = output_disk_list.split(",")
            for disk in output_disks:
                print "Clearing %s" % disk
                parallel_ssh(None, "rm -rf %s" % disk, username, node_list, False, False, False)

        if sleep > 0 and i != iterations - 1:
            print "Sleeping %d seconds" % sleep
            time.sleep(sleep)

    print "\nCompleted %d iterations\n" % iterations
    # Format node throughputs
    node_benchmark_throughput_strings = ["%.2f" % x for x in node_benchmark_throughputs]
    print "  Node throughputs (MB/s):"
    print "    %s" % node_benchmark_throughput_strings
    print "  Average node throughput: %.2f MB/s" % (numpy.mean(node_benchmark_throughputs))
    print "  Standard deviation: %.2f MB/s" % (numpy.std(node_benchmark_throughputs))
    print "  Min node throughput: %.2f MB/s" % (numpy.min(node_benchmark_throughputs))
    print "  Max node throughput: %.2f MB/s\n" % (numpy.max(node_benchmark_throughputs))

    if stage_stats is not None:
        for stage_name in stage_stats:
            print "  Average %s throughput: %.2f MB/s/node" % (stage_name, total_throughputs[stage_name] / iterations)
def set_ip_addresses(redis_client, nodes, interface_names, username):
    # Given a list of nodes, ssh into each and get the IP address corresponding
    # to the selected interfaces.
    ips = {}
    return_code = 0
    for node in nodes:
        ips[node] = []
    for interface in interface_names:

        # Get the IP address for this interface to be used in Themis Centos 7
        command = "/sbin/ip -o -4 addr list %s | awk \'{print $4}\' | cut -d/ -f1" % interface

        # Get the IP address for this interface to be used in Themis
        #command = "/sbin/ifconfig %s | sed \'/inet addr/!d;s/.*addr:\(.*\) "\
        #    "Bcast.*$/\\1/\'" % interface

        return_code, stdout_ifconfig, stderr = parallel_ssh(
            redis_client, command, username, nodes, ignore_bad_hosts=True,
            master=False, verbose=False)
        if return_code != 0:
            print >>sys.stderr, "Failed parallel ssh of command '%s'" % command
            break

        for node in nodes:
            # Get IP address of this interface from stdout
            # Make sure the command actually succeeded
            ips[node].append(stdout_ifconfig[node].strip())

    # Get internal/external IP and hostname for cluster display purposes.
    command = "/bin/grep internal_dns ~/node.conf | awk -F= \'{print $2}\' | tr -d \" \""

    return_code, stdout_internal_dns, stderr = parallel_ssh(
        redis_client, command, username, nodes, ignore_bad_hosts=True,
        master=False, verbose=False)
    if return_code != 0:
        print >>sys.stderr, "Failed parallel ssh of command '%s'" % command

    command = "/bin/grep external_dns ~/node.conf | awk -F= \'{print $2}\' | tr -d \" \""

    return_code, stdout_external_dns, stderr = parallel_ssh(
        redis_client, command, username, nodes, ignore_bad_hosts=True,
        master=False, verbose=False)
    if return_code != 0:
        print >>sys.stderr, "Failed parallel ssh of command '%s'" % command

    command = "/bin/grep internal_ip ~/node.conf | awk -F= \'{print $2}\' | tr -d \" \""

    return_code, stdout_internal_ip, stderr = parallel_ssh(
        redis_client, command, username, nodes, ignore_bad_hosts=True,
        master=False, verbose=False)
    if return_code != 0:
        print >>sys.stderr, "Failed parallel ssh of command '%s'" % command

    command = "/bin/grep external_ip ~/node.conf | awk -F= \'{print $2}\' | tr -d \" \""

    return_code, stdout_external_ip, stderr = parallel_ssh(
        redis_client, command, username, nodes, ignore_bad_hosts=True,
        master=False, verbose=False)
    if return_code != 0:
        print >>sys.stderr, "Failed parallel ssh of command '%s'" % command

    for node in nodes:
        # hostname and ipv4_address maps are used for file locations, so just
        # use the first interface
        if len(ips[node]) > 0:
            redis_client.hset("ipv4_address", node, ips[node][0])
        # Add a reverse hostname map
        for ip in ips[node]:
            redis_client.hset("hostname", ip, node)

        # Add external/internal dns/ip for cluster monitor
        redis_client.hset(
            "internal_dns", node, stdout_internal_dns[node].strip())
        redis_client.hset(
            "external_dns", node, stdout_external_dns[node].strip())
        redis_client.hset(
            "internal_ip", node, stdout_internal_ip[node].strip())
        redis_client.hset(
            "external_ip", node, stdout_external_ip[node].strip())

        # Add all IP addresses to the interfaces map
        redis_client.hset("interfaces", node, ",".join(ips[node]))

    return return_code
Exemple #8
0
def set_ip_addresses(redis_client, nodes, interface_names, username):
    # Given a list of nodes, ssh into each and get the IP address corresponding
    # to the selected interfaces.
    ips = {}
    return_code = 0
    for node in nodes:
        ips[node] = []
    for interface in interface_names:

        # Get the IP address for this interface to be used in Themis Centos 7
        command = "/sbin/ip -o -4 addr list %s | awk \'{print $4}\' | cut -d/ -f1" % interface

        # Get the IP address for this interface to be used in Themis
        #command = "/sbin/ifconfig %s | sed \'/inet addr/!d;s/.*addr:\(.*\) "\
        #    "Bcast.*$/\\1/\'" % interface

        return_code, stdout_ifconfig, stderr = parallel_ssh(
            redis_client,
            command,
            username,
            nodes,
            ignore_bad_hosts=True,
            master=False,
            verbose=False)
        if return_code != 0:
            print >> sys.stderr, "Failed parallel ssh of command '%s'" % command
            break

        for node in nodes:
            # Get IP address of this interface from stdout
            # Make sure the command actually succeeded
            ips[node].append(stdout_ifconfig[node].strip())

    # Get internal/external IP and hostname for cluster display purposes.
    command = "/bin/grep internal_dns ~/node.conf | awk -F= \'{print $2}\' | tr -d \" \""

    return_code, stdout_internal_dns, stderr = parallel_ssh(
        redis_client,
        command,
        username,
        nodes,
        ignore_bad_hosts=True,
        master=False,
        verbose=False)
    if return_code != 0:
        print >> sys.stderr, "Failed parallel ssh of command '%s'" % command

    command = "/bin/grep external_dns ~/node.conf | awk -F= \'{print $2}\' | tr -d \" \""

    return_code, stdout_external_dns, stderr = parallel_ssh(
        redis_client,
        command,
        username,
        nodes,
        ignore_bad_hosts=True,
        master=False,
        verbose=False)
    if return_code != 0:
        print >> sys.stderr, "Failed parallel ssh of command '%s'" % command

    command = "/bin/grep internal_ip ~/node.conf | awk -F= \'{print $2}\' | tr -d \" \""

    return_code, stdout_internal_ip, stderr = parallel_ssh(
        redis_client,
        command,
        username,
        nodes,
        ignore_bad_hosts=True,
        master=False,
        verbose=False)
    if return_code != 0:
        print >> sys.stderr, "Failed parallel ssh of command '%s'" % command

    command = "/bin/grep external_ip ~/node.conf | awk -F= \'{print $2}\' | tr -d \" \""

    return_code, stdout_external_ip, stderr = parallel_ssh(
        redis_client,
        command,
        username,
        nodes,
        ignore_bad_hosts=True,
        master=False,
        verbose=False)
    if return_code != 0:
        print >> sys.stderr, "Failed parallel ssh of command '%s'" % command

    for node in nodes:
        # hostname and ipv4_address maps are used for file locations, so just
        # use the first interface
        if len(ips[node]) > 0:
            redis_client.hset("ipv4_address", node, ips[node][0])
        # Add a reverse hostname map
        for ip in ips[node]:
            redis_client.hset("hostname", ip, node)

        # Add external/internal dns/ip for cluster monitor
        redis_client.hset("internal_dns", node,
                          stdout_internal_dns[node].strip())
        redis_client.hset("external_dns", node,
                          stdout_external_dns[node].strip())
        redis_client.hset("internal_ip", node,
                          stdout_internal_ip[node].strip())
        redis_client.hset("external_ip", node,
                          stdout_external_ip[node].strip())

        # Add all IP addresses to the interfaces map
        redis_client.hset("interfaces", node, ",".join(ips[node]))

    return return_code
def run_benchmark(
    binary, config, batch_directory, phase_directory, profiler,
    profiler_options, peer_list, node_list, per_peer_config,
    dump_core_directory, solo_mode, vnstat_interface, params):

    # Get ssh username.
    username = read_conf_file("cluster.conf", "cluster", "username")

    # Add command line parameters to binary
    binary = "%s -LOG_DIR %s" % (
        binary, phase_directory)

    if dump_core_directory is not None:
        binary = "cd %s; ulimit -c unlimited; %s" % (
            dump_core_directory, binary)

    processes = []
    start_time = time.time()
    for index, ip in enumerate(node_list):
        # Now start themis binaries
        if solo_mode:
            # Act as if you are the only peer in the cluster.
            peer_binary = "%s -PEER_LIST %s" % (binary, ip)
        else:
            # Use the entire set of peers and designate yourself as
            # one of them.
            peer_binary = "%s -PEER_LIST %s -MYPEERID %d" % (
                binary, peer_list, index)

        if per_peer_config:
            # Append the IP address to the config file name
            peer_binary = "%s -CONFIG %s.%s" % (peer_binary, config, ip)
        else:
            peer_binary = "%s -CONFIG %s" % (peer_binary, config)

        # Override config file with specified parameters
        if params:
            peer_binary = "%s %s" % (peer_binary, params)

        if profiler == "operf":
            # Use the batch directory as the operf session dir
            session_dir = os.path.join(batch_directory, "oprofile_data.%s", ip)
            parallel_ssh(
                None, "mkdir -p %s" % session_dir, username, node_list,
                False, True, False)
            peer_binary = "%s %s --session-dir=%s %s" % (
                profiler, profiler_options, session_dir, peer_binary)
        elif profiler is not None:
            # Some other profiler, just prepend it to the binary
            peer_binary = "%s %s %s" % (
                profiler, profiler_options, peer_binary)

        # Run the node-local benchmark script.
        vnstat_param_string = ""
        if vnstat_interface != None:
            vnstat_param_string = "--vnstat_interface %s" % vnstat_interface
        command = '%s %s "%s/run_benchmark_local.py %s %s \'%s\'"' % (
            ssh_command(), ip, THEMIS_SCRIPTS_DIR, vnstat_param_string,
            phase_directory, peer_binary)

        processes.append((subprocess.Popen(command, shell=True), ip))

    print "%d tasks launched on %s\n" % (len(processes), time.asctime())

    elapsed_times = []
    completed_ips = []

    num_nodes = len(processes)

    while len(processes) > 0:
        for process, ip in processes:
            process.poll()
            if process.returncode != None:
                elapsed_time = time.time() - start_time
                process.communicate()
                processes.remove((process, ip))
                elapsed_times.append(elapsed_time)
                completed_ips.append(ip)
                print "Node %s completed in %.2f seconds (%d / %d)" % (
                    ip, elapsed_time, len(elapsed_times), num_nodes)

                break

    stop_time = time.time()

    return (stop_time - start_time, elapsed_times, completed_ips)
def run_benchmark_iterations(
    binary, log_directory, config, peer_ips, profiler, profiler_options,
    iterations, sleep, delete_output, per_peer_config, dump_core_directory,
    solo_mode, stage_stats, interfaces, params=""):

    # Get ssh username and themis directory
    username, themis_directory = read_conf_file(
        "cluster.conf", "cluster", ["username", "themis_directory"])
    themis_directory = os.path.expanduser(themis_directory)
    # Get cloud provider if applicable.
    provider = read_conf_file("cluster.conf", "cluster", "provider")

    if interfaces == None:
        vnstat_interface = None
    else:
        interface_list = filter(lambda x: len(x) > 0, interfaces.split(','))
        vnstat_interface = interface_list[0]

    if not os.path.exists(config):
        sys.exit("Config file %s does not exist." % config)

    with open(config, 'r') as fp:
        app_config = yaml.load(fp)

    # If we're using more than 1 network interface per peer, the peer list is
    # going to look like:
    # Peer1_interface1, Peer1_interface2, Peer2_interface1, Peer2_interface2, ..
    # In this case, we only want to launch the benchmark once per peer, so
    # make sure we only look at the first interface for each peer, and let
    # the application itself deal with the other interfaces.
    num_interfaces = 1
    if "NUM_INTERFACES" in app_config:
        num_interfaces = app_config["NUM_INTERFACES"]

    # Remove trailing comma if any from the IP list. This will be the string we
    # pass into the benchmark binary.
    peer_list = peer_ips.rstrip(",")

    # If we're using multiple interfaces, only launch the benchmark once per
    # node.
    node_list = peer_list.split(",")[::num_interfaces]

    # Look for description files in the same directory as the binary.
    binary_dir = os.path.dirname(binary)
    description_directory = os.path.join(binary_dir, "description")

    if not os.path.exists(description_directory):
        sys.exit("Could not find description directory %s" % (
                description_directory))

    # Check for the phase name. For simplicity we're going to require that
    # the benchmark have only 1 phase
    description = Description(description_directory)
    phases = description.getPhaseList()
    if len(phases) != 1:
        sys.exit("Benchmark must have exactly one phase. Got %s" % phases)
    phase_name = phases[0]

    data_size_per_node = int(
        app_config["BENCHMARK_DATA_SIZE_PER_NODE"][phase_name])
    data_size = data_size_per_node * len(node_list)

    total_throughputs = {}
    if stage_stats is not None:
        stage_stats = stage_stats.split(",")
        for stage in stage_stats:
            total_throughputs[stage] = 0.0

    node_benchmark_throughputs = []

    for i in xrange(iterations):
        # Pick a unique batch ID
        batch = 0
        while os.path.exists(
            os.path.join(log_directory, "batch_%d" % batch)):
            batch += 1
        batch_directory = os.path.join(log_directory, "batch_%d" % batch)

        # Create directories
        phase_directory = os.path.join(batch_directory, phase_name)
        parallel_ssh(
            None, "mkdir -p %s" % phase_directory, username, node_list, False,
            True, False)

        # Copy description files and create phase directory.
        if not os.path.exists(batch_directory):
            os.makedirs(batch_directory)
        shutil.copy(
            os.path.join(description_directory, "stages.json"),
            batch_directory)
        shutil.copy(
            os.path.join(description_directory, "structure.json"),
            batch_directory)
        os.chmod(os.path.join(batch_directory, "stages.json"), 0777)
        os.chmod(os.path.join(batch_directory, "structure.json"), 0777)

        # Copy config file
        shutil.copyfile(config, os.path.join(batch_directory, "config.yaml"))

        print "\nLogging to %s" % (batch_directory)
        print "Running %s with batch ID %d on %d nodes..." % (
            phase_name, batch, len(node_list))

        (elapsed, elapsed_times, completed_ips) = run_benchmark(
            binary, config, batch_directory, phase_directory, profiler,
            profiler_options, peer_list, node_list, per_peer_config,
            dump_core_directory, solo_mode, vnstat_interface, params)

        # Compute overall throughput
        throughput = (data_size / elapsed) / 1000000
        per_node_throughput = (data_size_per_node / elapsed) / 1000000
        print "Completed in %.2f seconds." % elapsed
        print "  Throughput: %.2f MB/s" % throughput
        print "  Per-server: %.2f MB/s" % per_node_throughput

        # Record individual throughputs
        throughputs = [(data_size_per_node / x) / 1000000 \
                           for x in elapsed_times]
        node_benchmark_throughputs += throughputs

        # Dump these results to a file in the batch directory
        results_file = open(os.path.join(batch_directory, "results"), "w")
        results_file.write(
            "Runtime: %.2f seconds\nThroughput: %.2f MB/s\nPer-server: " \
                "%.2f MB/s\n\n" % (elapsed, throughput, per_node_throughput))
        results_file.write("Node throughputs: %s\n\n" % throughputs)
        for ip, elapsed_time, throughput in zip(
            completed_ips, elapsed_times, throughputs):
            results_file.write(
                "Node %s completed in %.2f seconds (%.2f MB/s)\n" % (
                    ip, elapsed_time, throughput))
        results_file.write("\n")

        if stage_stats is not None:
            # Compute runtime stat throughputs

            done = False
            while not done:
                # Upload all logs.
                upload_logs()

                # Download logs locally.
                download_logs()

                try:
                    runtime_info = gather_runtime_info(batch_directory, False)
                    done = True
                except ValueError:
                    print "Runtime info script failed. Retrying log upload/downloads."

            stage_info = runtime_info[0]["stages"]
            node_throughputs = {}
            for worker_info in stage_info:
                stats_info = worker_info["stats_info"]
                # We only want to look at the overall stats, which includes all
                # nodes (hostname or worker ID won't be specified)
                if len(stats_info) == 1:
                    stage_name = stats_info["stage"]

                    if stage_name in stage_stats:
                        # This is one of the stages we care about
                        node_throughputs[stage_name] = \
                            worker_info["observed_processing_rate_per_node"]
                        total_throughputs[stage_name] += \
                            node_throughputs[stage_name]

            # Print throughputs in the correct order.
            for stage_name in stage_stats:
                print "  %s throughput: %.2f MB/s/node" % (
                    stage_name, node_throughputs[stage_name])
                results_file.write("%s throughput: %.2f MB/s\n" % (
                        stage_name, node_throughputs[stage_name]))

        results_file.close()

        if delete_output and "OUTPUT_DISK_LIST" in app_config and \
                phase_name in app_config["OUTPUT_DISK_LIST"]:
            output_disk_list = app_config["OUTPUT_DISK_LIST"][phase_name]
            output_disks = output_disk_list.split(",")
            for disk in output_disks:
                print "Clearing %s" % disk
                parallel_ssh(
                    None, "rm -rf %s" % disk, username, node_list, False,
                    False, False)

        if sleep > 0 and i != iterations - 1:
            print "Sleeping %d seconds" % sleep
            time.sleep(sleep)

    print "\nCompleted %d iterations\n" % iterations
    # Format node throughputs
    node_benchmark_throughput_strings = [
        "%.2f" % x for x in node_benchmark_throughputs]
    print "  Node throughputs (MB/s):"
    print "    %s" % node_benchmark_throughput_strings
    print "  Average node throughput: %.2f MB/s" % (
        numpy.mean(node_benchmark_throughputs))
    print "  Standard deviation: %.2f MB/s" % (
        numpy.std(node_benchmark_throughputs))
    print "  Min node throughput: %.2f MB/s" % (
        numpy.min(node_benchmark_throughputs))
    print "  Max node throughput: %.2f MB/s\n" % (
        numpy.max(node_benchmark_throughputs))

    if stage_stats is not None:
        for stage_name in stage_stats:
            print "  Average %s throughput: %.2f MB/s/node" % (
                stage_name, total_throughputs[stage_name] / iterations)