def __init__(self, log_dir='logs'):
        """
        @param log_dir: Top-level directory in which to write log files.
                        A separate subdirectory will be created in this
                        directory for the log files from this run. This can
                        only be overridden by passing it to __init__() since
                        that method creates the subdirectory.
                        (default: logs)
        """
        self.log_level = 'NOTICE'
        self.verbose = False
        self.transport = 'infrc'
        self.replicas = 3
        self.disk = default_disk1

        self.coordinator = None
        self.next_server_id = 0
        self.next_client_id = 0
        self.masters_started = 0
        self.backups_started = 0

        self.log_subdir = log.createDir(log_dir)
        self.sandbox = Sandbox()
Exemple #2
0
    def __init__(self, log_dir='logs'):
        """
        @param log_dir: Top-level directory in which to write log files.
                        A separate subdirectory will be created in this
                        directory for the log files from this run. This can
                        only be overridden by passing it to __init__() since
                        that method creates the subdirectory.
                        (default: logs)
        """
        self.log_level = 'NOTICE'
        self.verbose = False
        self.transport = 'infrc'
        self.replicas = 3
        self.disk = default_disk1

        self.coordinator = None
        self.next_server_id = 0
        self.next_client_id = 0
        self.masters_started = 0
        self.backups_started = 0

        self.log_subdir = log.createDir(log_dir)
        self.sandbox = Sandbox()
Exemple #3
0
def run(
    num_servers=4,  # Number of hosts on which to start
    # servers (not including coordinator).
    backups_per_server=1,  # Number of backups to run on each
    # server host (0, 1, or 2).
    replicas=3,  # Replication factor to use for each
    # log segment.
    disk1=default_disk1,  # Server arguments specifying the
    # backing device for the first backup
    # on each server.
    disk2=default_disk2,  # Server arguments specifying the
    # backing device for the first backup
    # on each server (if backups_per_server= 2).
    timeout=20,  # How many seconds to wait for the
    # clients to complete.
    coordinator_args='',  # Additional arguments for the
    # coordinator.
    master_args='',  # Additional arguments for each server
    # that runs a master
    backup_args='',  # Additional arguments for each server
    # that runs a backup.
    log_level='NOTICE',  # Log level to use for all servers.
    log_dir='logs',  # Top-level directory in which to write
    # log files.  A separate subdirectory
    # will be created in this directory
    # for the log files from this run.
    client='echo',  # Command-line to invoke for each client
    # additional arguments will be prepended
    # with configuration information such as
    # -C.
    num_clients=1,  # Number of client processes to run.
    # They will all run on separate
    # machines, if possible, but if there
    # aren't enough available machines then
    # multiple clients will run on some
    # machines.
    share_hosts=False,  # True means clients can be run on
    # machines running servers, if needed.
    transport='infrc',  # Name of transport to use for servers.
    verbose=False,  # Print information about progress in
    # starting clients and servers
    debug=False,  # If True, pause after starting all
    # to allow for debugging setup such as
    # attaching gdb.
    old_master_host=None,  # Pass a (hostname, ip, id) tuple to
    # construct a large master on that host
    # before the others are started.  Useful
    # for creating the old master for
    # recoveries.
    old_master_args=""  # Additional arguments to run on the
    # old master (e.g. total RAM).
):
    """
    Start a coordinator and servers, as indicated by the arguments.
    Then start one or more client processes and wait for them to complete.
    @return: string indicating the path to the log files for this run.
    """

    if num_servers > len(hosts):
        raise Exception("num_servers (%d) exceeds the available hosts (%d)" %
                        (num_servers, len(hosts)))

    # Create a subdirectory of the log directory for this run
    log_subdir = log.createDir(log_dir)

    coordinator = None
    servers = []
    clients = []
    with Sandbox() as sandbox:

        def ensure_servers(numMasters, numBackups):
            sandbox.checkFailures()
            try:
                sandbox.rsh(
                    hosts[0][0], '%s -C %s -m %d -b %d -l 1 --wait 5 '
                    '--logFile %s/ensureServers.log' %
                    (ensure_servers_bin, coordinator_locator, numMasters,
                     numBackups, log_subdir))
            except:
                # prefer exceptions from dead processes to timeout error
                sandbox.checkFailures()
                raise

        # Start coordinator
        if num_servers > 0:
            coordinator_host = hosts[0]
            coordinator_locator = coord_locator(transport, coordinator_host)
            coordinator = sandbox.rsh(
                coordinator_host[0],
                ('%s -C %s -l %s --logFile %s/coordinator.%s.log %s' %
                 (coordinator_binary, coordinator_locator, log_level,
                  log_subdir, coordinator_host[0], coordinator_args)),
                bg=True,
                stderr=subprocess.STDOUT)
            ensure_servers(0, 0)
            if verbose:
                print "Coordinator started on %s at %s" % (coordinator_host[0],
                                                           coordinator_locator)

        # Track how many services are registered with the coordinator
        # for ensure_servers
        masters_started = 0
        backups_started = 0

        # Start old master - a specialized master for recovery with lots of data
        if old_master_host:
            host = old_master_host
            command = ('%s -C %s -L %s -M -r %d -l %s '
                       '--logFile %s/oldMaster.%s.log %s' %
                       (server_binary, coordinator_locator,
                        server_locator(transport, host), replicas, log_level,
                        log_subdir, host[0], old_master_args))
            servers.append(
                sandbox.rsh(host[0],
                            command,
                            ignoreFailures=True,
                            bg=True,
                            stderr=subprocess.STDOUT))
            masters_started += 1
            ensure_servers(masters_started, 0)

        # Start servers
        for i in range(num_servers):
            # First start the main server on this host, which runs a master
            # and possibly a backup.  The first server shares the same machine
            # as the coordinator.
            host = hosts[i]
            command = ('%s -C %s -L %s -r %d -l %s '
                       '--logFile %s/server.%s.log %s' %
                       (server_binary, coordinator_locator,
                        server_locator(transport, host), replicas, log_level,
                        log_subdir, host[0], master_args))
            if backups_per_server > 0:
                command += ' %s %s' % (disk1, backup_args)
                masters_started += 1
                backups_started += 1
            else:
                command += ' -M'
                masters_started += 1
            servers.append(
                sandbox.rsh(host[0],
                            command,
                            bg=True,
                            stderr=subprocess.STDOUT))
            if verbose:
                print "Server started on %s at %s" % (
                    host[0], server_locator(transport, host))

            # Start an extra backup server in this host, if needed.
            if backups_per_server == 2:
                command = (
                    '%s -C %s -L %s -B %s -l %s '
                    '--logFile %s/backup.%s.log %s' %
                    (server_binary, coordinator_locator,
                     server_locator(transport, host, second_backup_port),
                     disk2, log_level, log_subdir, host[0], backup_args))
                servers.append(
                    sandbox.rsh(host[0],
                                command,
                                bg=True,
                                stderr=subprocess.STDOUT))
                backups_started += 1
                if verbose:
                    print "Extra backup started on %s at %s" % (
                        host[0],
                        server_locator(transport, host, second_backup_port))
        if debug:
            print "Servers started; pausing for debug setup."
            raw_input("Type <Enter> to continue: ")
        if masters_started > 0 or backups_started > 0:
            ensure_servers(masters_started, backups_started)
            if verbose:
                print "All servers running"

        # Start clients
        args = client.split(" ")
        client_bin = args[0]
        client_args = " ".join(args[1:])
        host_index = num_servers
        for i in range(num_clients):
            if host_index >= len(hosts):
                if share_hosts or num_servers >= len(hosts):
                    host_index = 0
                else:
                    host_index = num_servers
            client_host = hosts[host_index]
            command = ('%s -C %s --numClients %d --clientIndex %d '
                       '--logFile %s/client%d.%s.log %s' %
                       (client_bin, coordinator_locator, num_clients, i,
                        log_subdir, i, client_host[0], client_args))
            clients.append(sandbox.rsh(client_host[0], command, bg=True))
            if verbose:
                print "Client %d started on %s: %s" % (i, client_host[0],
                                                       command)
            host_index += 1

        # Wait for all of the clients to complete
        start = time.time()
        for i in range(num_clients):
            while clients[i].returncode is None:
                sandbox.checkFailures()
                time.sleep(.1)
                if time.time() - start > timeout:
                    raise Exception('timeout exceeded')
            if verbose:
                print "Client %d finished" % i

        return log_subdir
Exemple #4
0
    def __init__(self,
                 log_dir='logs',
                 log_exists=False,
                 cluster_name_exists=False):
        """
        @param log_dir: Top-level directory in which to write log files.
                        A separate subdirectory will be created in this
                        directory for the log files from this run. This can
                        only be overridden by passing it to __init__() since
                        that method creates the subdirectory.
                        (default: logs)
        @param log_exists:
                        Indicates whether the log directory already exists.
                        This will be true for cluster objects that are
                        created after starting the clusterperf test.
                        (default: False)
        @param cluster_name_exists:
                        Indicates whether a cluster name already exists as
                        part of this test. Backups that are started/restarted
                        using the same cluster name will read data from the
                        replicas
                        (default: False)
        """
        self.log_level = 'NOTICE'
        self.verbose = False
        self.transport = 'basic+infud'
        self.replicas = 3
        self.disk = default_disk1
        self.disjunct = False

        if cluster_name_exists:  # do nothing if it exists
            self.cluster_name = None
            if self.verbose:
                print('Cluster name exists')
        else:
            self.cluster_name = 'cluster_' + ''.join([
                chr(random.choice(range(ord('a'), ord('z'))))
                for c in range(20)
            ])
        if self.verbose:
            print('Cluster name is %s' % (self.cluster_name))

        self.coordinator = None
        self.next_server_id = 1
        self.next_client_id = 1
        self.masters_started = 0
        self.backups_started = 0
        if config.hooks.other_hosts:
            self.coordinator_host = config.hooks.other_hosts[0]
        else:
            self.coordinator_host = getHosts()[0]
        self.coordinator_locator = coord_locator(self.transport,
                                                 self.coordinator_host)
        self.log_subdir = log.createDir(log_dir, log_exists)

        # Create a perfcounters directory under the log directory.
        os.mkdir(self.log_subdir + '/perfcounters')
        if not log_exists:
            self.sandbox = Sandbox()
        else:
            self.sandbox = Sandbox(cleanup=False)
        # create the shm directory to store shared files
        try:
            os.mkdir('%s/logs/shm' % os.getcwd())
        except:
            pass
        f = open('%s/logs/shm/README' % os.getcwd(), 'w+')
        f.write('This directory contains files that correspond to'
                'different server processes that were started during'
                'the last run of clusterperf. Filename is\n'
                '"<hostname>_<pid>". Each of these files stores'
                'the service locator of the respective server which is'
                'used to give information to the client.\nThe existence'
                'of this file at the end of a clusterperf run  means'
                'that processes were not cleaned up properly the last'
                ' time. So one can use these pids during manual clean up')
        if not cluster_name_exists:
            # store the name of the cluster by creating an empty file with
            # the appropriate file name in shm so that new backups when
            # created using a different cluster object can use it to read
            # data from their disks
            f = open('%s/logs/shm/%s' % (os.getcwd(), self.cluster_name), 'w+')
Exemple #5
0
    def __init__(self, log_dir='logs', log_exists=False,
                    cluster_name_exists=False):
        """
        @param log_dir: Top-level directory in which to write log files.
                        A separate subdirectory will be created in this
                        directory for the log files from this run. This can
                        only be overridden by passing it to __init__() since
                        that method creates the subdirectory.
                        (default: logs)
        @param log_exists:
                        Indicates whether the log directory already exists.
                        This will be true for cluster objects that are
                        created after starting the clusterperf test.
                        (default: False)
        @param cluster_name_exists:
                        Indicates whether a cluster name already exists as
                        part of this test. Backups that are started/restarted
                        using the same cluster name will read data from the
                        replicas
                        (default: False)
        """
        self.log_level = 'NOTICE'
        self.verbose = False
        self.transport = 'infrc'
        self.replicas = 3
        self.disk = default_disk1
        self.disjunct = False

        if cluster_name_exists: # do nothing if it exists
            self.cluster_name = None
            if self.verbose:
                print ('Cluster name exists')
        else:
            self.cluster_name = 'cluster_' +  ''.join([chr(random.choice(
                                 range(ord('a'), ord('z'))))
                                    for c in range(20)])
        if self.verbose:
            print ('Cluster name is %s' % (self.cluster_name))

        self.coordinator = None
        self.next_server_id = 1
        self.next_client_id = 1
        self.masters_started = 0
        self.backups_started = 0

        self.coordinator_host= hosts[0]
        self.coordinator_locator = coord_locator(self.transport,
                                                 self.coordinator_host)
        self.log_subdir = log.createDir(log_dir, log_exists)

        # Create a perfcounters directory under the log directory.
        os.mkdir(self.log_subdir + '/perfcounters')
        if not log_exists:
            self.sandbox = Sandbox()
        else:
            self.sandbox = Sandbox(cleanup=False)
        # create the shm directory to store shared files
        try:
            os.mkdir('%s/logs/shm' % os.getcwd())
        except:
            pass
        f = open('%s/logs/shm/README' % os.getcwd(), 'w+')
        f.write('This directory contains files that correspond to'
                'different server processes that were started during'
                'the last run of clusterperf. Filename is\n'
                '"<hostname>_<pid>". Each of these files stores'
                'the service locator of the respective server which is'
                'used to give information to the client.\nThe existence'
                'of this file at the end of a clusterperf run  means'
                'that processes were not cleaned up properly the last'
                ' time. So one can use these pids during manual clean up')
        if not cluster_name_exists:
            # store the name of the cluster by creating an empty file with
            # the appropriate file name in shm so that new backups when
            # created using a different cluster object can use it to read
            # data from their disks
            f = open('%s/logs/shm/%s' % (os.getcwd(), self.cluster_name),
                     'w+')
Exemple #6
0
def run(
        num_servers=4,             # Number of hosts on which to start
                                   # servers (not including coordinator).
        backups_per_server=1,      # Number of backups to run on each
                                   # server host (0, 1, or 2).
        replicas=3,                # Replication factor to use for each
                                   # log segment.
        disk1=default_disk1,       # Server arguments specifying the
                                   # backing device for the first backup
                                   # on each server.
        disk2=default_disk2,       # Server arguments specifying the
                                   # backing device for the first backup
                                   # on each server (if backups_per_server= 2).
        timeout=20,                # How many seconds to wait for the
                                   # clients to complete.
        coordinator_args='',       # Additional arguments for the
                                   # coordinator.
        master_args='',            # Additional arguments for each server
                                   # that runs a master
        backup_args='',            # Additional arguments for each server
                                   # that runs a backup.
        log_level='NOTICE',        # Log level to use for all servers.
        log_dir='logs',            # Top-level directory in which to write
                                   # log files.  A separate subdirectory
                                   # will be created in this directory
                                   # for the log files from this run.
        client='echo',             # Command-line to invoke for each client
                                   # additional arguments will be prepended
                                   # with configuration information such as
                                   # -C.
        num_clients=1,             # Number of client processes to run.
                                   # They will all run on separate
                                   # machines, if possible, but if there
                                   # aren't enough available machines then
                                   # multiple clients will run on some
                                   # machines.
        share_hosts=False,         # True means clients can be run on
                                   # machines running servers, if needed.
        transport='infrc',         # Name of transport to use for servers.
        verbose=False,             # Print information about progress in
                                   # starting clients and servers
        debug=False,               # If True, pause after starting all
                                   # to allow for debugging setup such as
                                   # attaching gdb.
        old_master_host=None,      # Pass a (hostname, ip, id) tuple to
                                   # construct a large master on that host
                                   # before the others are started.  Useful
                                   # for creating the old master for
                                   # recoveries.
        old_master_args=""         # Additional arguments to run on the
                                   # old master (e.g. total RAM).
        ):       
    """
    Start a coordinator and servers, as indicated by the arguments.
    Then start one or more client processes and wait for them to complete.
    @return: string indicating the path to the log files for this run.
    """

    if num_servers > len(hosts):
        raise Exception("num_servers (%d) exceeds the available hosts (%d)"
                        % (num_servers, len(hosts)))

    # Create a subdirectory of the log directory for this run
    log_subdir = log.createDir(log_dir)

    coordinator = None
    servers = []
    clients = []
    with Sandbox() as sandbox:
        def ensure_servers(numMasters, numBackups):
            sandbox.checkFailures()
            try:
                sandbox.rsh(hosts[0][0], '%s -C %s -m %d -b %d -l 1 --wait 5 '
                            '--logFile %s/ensureServers.log' %
                            (ensure_servers_bin, coordinator_locator,
                             numMasters, numBackups, log_subdir))
            except:
                # prefer exceptions from dead processes to timeout error
                sandbox.checkFailures()
                raise

        # Start coordinator
        if num_servers > 0:
            coordinator_host = hosts[0]
            coordinator_locator = coord_locator(transport, coordinator_host)
            coordinator = sandbox.rsh(coordinator_host[0],
                      ('%s -C %s -l %s --logFile %s/coordinator.%s.log %s' %
                       (coordinator_binary, coordinator_locator, log_level,
                        log_subdir, coordinator_host[0], coordinator_args)),
                      bg=True, stderr=subprocess.STDOUT)
            ensure_servers(0, 0)
            if verbose:
                print "Coordinator started on %s at %s" % (coordinator_host[0],
                        coordinator_locator)

        # Track how many services are registered with the coordinator
        # for ensure_servers
        masters_started = 0
        backups_started = 0

        # Start old master - a specialized master for recovery with lots of data
        if old_master_host:
            host = old_master_host
            command = ('%s -C %s -L %s -M -r %d -l %s '
                       '--logFile %s/oldMaster.%s.log %s' %
                       (server_binary, coordinator_locator,
                        server_locator(transport, host),
                        replicas, log_level, log_subdir, host[0],
                        old_master_args))
            servers.append(sandbox.rsh(host[0], command, ignoreFailures=True,
                           bg=True, stderr=subprocess.STDOUT))
            masters_started += 1
            ensure_servers(masters_started, 0)

        # Start servers
        for i in range(num_servers):
            # First start the main server on this host, which runs a master
            # and possibly a backup.  The first server shares the same machine
            # as the coordinator.
            host = hosts[i];
            command = ('%s -C %s -L %s -r %d -l %s '
                       '--logFile %s/server.%s.log %s' %
                       (server_binary, coordinator_locator,
                        server_locator(transport, host),
                        replicas, log_level, log_subdir, host[0],
                        master_args))
            if backups_per_server > 0:
                command += ' %s %s' % (disk1, backup_args)
                masters_started += 1
                backups_started += 1
            else:
                command += ' -M'
                masters_started += 1
            servers.append(sandbox.rsh(host[0], command, bg=True,
                           stderr=subprocess.STDOUT))
            if verbose:
                print "Server started on %s at %s" % (host[0],
                                                      server_locator(transport,
                                                                     host))
            
            # Start an extra backup server in this host, if needed.
            if backups_per_server == 2:
                command = ('%s -C %s -L %s -B %s -l %s '
                           '--logFile %s/backup.%s.log %s' %
                           (server_binary, coordinator_locator,
                            server_locator(transport, host, second_backup_port),
                            disk2, log_level, log_subdir, host[0],
                            backup_args))
                servers.append(sandbox.rsh(host[0], command, bg=True,
                                           stderr=subprocess.STDOUT))
                backups_started += 1
                if verbose:
                    print "Extra backup started on %s at %s" % (host[0],
                            server_locator(transport, host, second_backup_port))
        if debug:
            print "Servers started; pausing for debug setup."
            raw_input("Type <Enter> to continue: ")
        if masters_started > 0 or backups_started > 0:
            ensure_servers(masters_started, backups_started)
            if verbose:
                print "All servers running"

        # Start clients
        args = client.split(" ")
        client_bin = args[0]
        client_args = " ".join(args[1:])
        host_index = num_servers
        for i in range(num_clients):
            if host_index >= len(hosts):
                if share_hosts or num_servers >= len(hosts):
                    host_index = 0
                else:
                    host_index = num_servers
            client_host = hosts[host_index]
            command = ('%s -C %s --numClients %d --clientIndex %d '
                       '--logFile %s/client%d.%s.log %s' %
                       (client_bin, coordinator_locator, num_clients,
                        i, log_subdir, i, client_host[0], client_args))
            clients.append(sandbox.rsh(client_host[0], command, bg=True))
            if verbose:
                print "Client %d started on %s: %s" % (i, client_host[0],
                        command)
            host_index += 1

        # Wait for all of the clients to complete
        start = time.time()
        for i in range(num_clients):
            while clients[i].returncode is None:
                sandbox.checkFailures()
                time.sleep(.1)
                if time.time() - start > timeout:
                    raise Exception('timeout exceeded')
            if verbose:
                print "Client %d finished" % i

        return log_subdir