Ejemplo n.º 1
0
def bootstrap_cluster(cfg):
    config = copy.copy(pristine_config)
    config.update(cfg)

    # Flamegraph Setup
    if flamegraph.is_enabled():
        execute(flamegraph.setup)

    git_id = bootstrap(config, destroy=True)
    return git_id
Ejemplo n.º 2
0
def bootstrap_cluster(cfg):
    config = copy.copy(pristine_config)
    config.update(cfg)

    # Flamegraph Setup
    if flamegraph.is_enabled():
        execute(flamegraph.setup)

    git_id = bootstrap(config, destroy=True)
    return git_id
Ejemplo n.º 3
0
def stress_compare(revisions,
                   title,
                   log,
                   operations = [],
                   subtitle = '',
                   capture_fincore=False,
                   initial_destroy=True,
                   leave_data=False,
                   keep_page_cache=False
               ):
    """
    Run Stress on multiple C* branches and compare them.

    revisions - List of dictionaries that contain cluster configurations
                to trial. This is combined with the default config.
    title - The title of the comparison
    subtitle - A subtitle for more information (displayed smaller underneath)
    log - The json file path to record stats to
    operations - List of dictionaries indicating the operations. Example:
       [# cassandra-stress command, node defaults to cluster defined 'stress_node'
        {'type': 'stress',
         'command': 'write n=19M -rate threads=50',
         'node': 'node1',
         'wait_for_compaction': True},
        # nodetool command to run in parallel on nodes:
        {'type': 'nodetool',
         'command': 'decomission',
         'nodes': ['node1','node2']},
        # cqlsh script, node defaults to cluster defined 'stress_node'
        {'type': 'cqlsh',
         'script': "use my_ks; INSERT INTO blah (col1, col2) VALUES (val1, val2);",
         'node': 'node1'}
       ]
    capture_fincore - Enables capturing of linux-fincore logs of C* data files.
    initial_destroy - Destroy all data before the first revision is run.
    leave_data - Whether to leave the Cassandra data/commitlog/etc directories intact between revisions.
    keep_page_cache - Whether to leave the linux page cache intact between revisions.
    """
    validate_revisions_list(revisions)
    validate_operations_list(operations)

    pristine_config = copy.copy(fab_config)

    # initial_destroy setting can be set in the job
    # configuration, or manually in the call to this function.
    # Either is fine, but they shouldn't conflict. If they do,
    # ValueError is raised.
    if initial_destroy == True and pristine_config.get('initial_destroy', None) == False:
        raise ValueError('setting for initial_destroy conflicts in job config and stress_compare() call')
    else:
        initial_destroy = bool(distutils.util.strtobool(str(pristine_config.get('initial_destroy', initial_destroy))))

    if initial_destroy:
        logger.info("Cleaning up from prior runs of stress_compare ...")
        teardown(destroy=True, leave_data=False)

    # Update our local cassandra git remotes and branches
    _, localhost_entry = get_localhost()
    with common.fab.settings(hosts=[localhost_entry]):
        execute(cstar.update_cassandra_git)

    # Flamegraph Setup
    if flamegraph.is_enabled():
        execute(flamegraph.setup)

    clean_stress()
    stress_revisions = set([operation['stress_revision'] for operation in operations if 'stress_revision' in operation])
    stress_shas = setup_stress(stress_revisions)

    with GracefulTerminationHandler() as handler:
        for rev_num, revision_config in enumerate(revisions):
            config = copy.copy(pristine_config)
            config.update(revision_config)
            revision = revision_config['revision']
            config['log'] = log
            config['title'] = title
            config['subtitle'] = subtitle
            product = dse if config.get('product') == 'dse' else cstar

            # leave_data setting can be set in the revision
            # configuration, or manually in the call to this function.
            # Either is fine, but they shouldn't conflict. If they do,
            # ValueError is raised.
            if leave_data == True and revision_config.get('leave_data', None) == False:
                raise ValueError('setting for leave_data conflicts in job config and stress_compare() call')
            else:
                leave_data = bool(distutils.util.strtobool(str(revision_config.get('leave_data', leave_data))))

            logger.info("Bringing up {revision} cluster...".format(revision=revision))

            # Drop the page cache between each revision, especially
            # important when leave_data=True :
            if not keep_page_cache:
                drop_page_cache()

            # Only fetch from git on the first run:
            git_fetch = True if rev_num == 0 else False
            revision_config['git_id'] = git_id = bootstrap(config,
                                                           destroy=initial_destroy,
                                                           leave_data=leave_data,
                                                           git_fetch=git_fetch)

            if flamegraph.is_enabled(revision_config):
                execute(flamegraph.ensure_stopped_perf_agent)
                execute(flamegraph.start_perf_agent, rev_num)

            if capture_fincore:
                start_fincore_capture(interval=10)

            last_stress_operation_id = 'None'
            for operation_i, operation in enumerate(operations, 1):
                try:
                    start = datetime.datetime.now()
                    stats = {
                        "id": str(uuid.uuid1()),
                        "type": operation['type'],
                        "revision": revision,
                        "git_id": git_id,
                        "start_date": start.isoformat(),
                        "label": revision_config.get('label', revision_config['revision']),
                        "test": '{operation_i}_{operation}'.format(
                            operation_i=operation_i,
                            operation=operation['type'])
                    }

                    if operation['type'] == 'stress':
                        last_stress_operation_id = stats['id']
                        # Default to all the nodes of the cluster if no
                        # nodes were specified in the command:
                        if operation.has_key('nodes'):
                            cmd = "{command} -node {hosts}".format(
                                command=operation['command'],
                                hosts=",".join(operation['nodes']))
                        elif '-node' in operation['command']:
                            cmd = operation['command']
                        else:
                            cmd = "{command} -node {hosts}".format(
                                command=operation['command'],
                                hosts=",".join([n for n in fab_config['hosts']]))
                        stats['command'] = cmd
                        stats['intervals'] = []
                        stats['test'] = '{operation_i}_{operation}'.format(
                            operation_i=operation_i, operation=cmd.strip().split(' ')[0]).replace(" ", "_")
                        logger.info('Running stress operation : {cmd}  ...'.format(cmd=cmd))
                        # Run stress:
                        # (stress takes the stats as a parameter, and adds
                        #  more as it runs):
                        stress_sha = stress_shas[operation.get('stress_revision', 'default')]
                        stats = stress(cmd, revision, stress_sha, stats=stats)
                        # Wait for all compactions to finish (unless disabled):
                        if operation.get('wait_for_compaction', True):
                            compaction_throughput = revision_config.get("compaction_throughput_mb_per_sec", 16)
                            wait_for_compaction(compaction_throughput=compaction_throughput)

                    elif operation['type'] == 'nodetool':
                        if 'nodes' not in operation:
                            operation['nodes'] = 'all'
                        if operation['nodes'] in ['all','ALL']:
                            nodes = [n for n in fab_config['hosts']]
                        else:
                            nodes = operation['nodes']

                        set_nodetool_path(os.path.join(product.get_bin_path(), 'nodetool'))
                        logger.info("Running nodetool on {nodes} with command: {command}".format(nodes=operation['nodes'], command=operation['command']))
                        stats['command'] = operation['command']
                        output = nodetool_multi(nodes, operation['command'])
                        stats['output'] = output
                        logger.info("Nodetool command finished on all nodes")

                    elif operation['type'] == 'cqlsh':
                        logger.info("Running cqlsh commands on {node}".format(node=operation['node']))
                        set_cqlsh_path(os.path.join(product.get_bin_path(), 'cqlsh'))
                        output = cqlsh(operation['script'], operation['node'])
                        stats['output'] = output.split("\n")
                        stats['command'] = operation['script']
                        logger.info("Cqlsh commands finished")

                    elif operation['type'] == 'bash':
                        nodes = operation.get('nodes', [n for n in fab_config['hosts']])
                        logger.info("Running bash commands on: {nodes}".format(nodes=nodes))
                        stats['output'] = bash(operation['script'], nodes)
                        stats['command'] = operation['script']
                        logger.info("Bash commands finished")

                    elif operation['type'] == 'spark_cassandra_stress':
                        node = operation['node']
                        logger.info("Running spark_cassandra_stress on {node}".format(node=node))
                        output = spark_cassandra_stress(operation['script'], node)
                        stats['output'] = output
                        logger.info("spark_cassandra_stress finished")

                    elif operation['type'] == 'ctool':
                        logger.info("Running ctool with parameters: {command}".format(command=operation['command']))
                        ctool = Ctool(operation['command'], common.config)
                        output = execute(ctool.run)
                        stats['output'] = output
                        logger.info("ctool finished")

                    elif operation['type'] == 'dsetool':
                        if 'nodes' not in operation:
                            operation['nodes'] = 'all'
                        if operation['nodes'] in ['all','ALL']:
                            nodes = [n for n in fab_config['hosts']]
                        else:
                            nodes = operation['nodes']

                        dsetool_options = operation['script']
                        logger.info("Running dsetool {command} on {nodes}".format(nodes=operation['nodes'], command=dsetool_options))
                        stats['command'] = dsetool_options
                        output = dsetool_cmd(nodes=nodes, options=dsetool_options)
                        stats['output'] = output
                        logger.info("dsetool command finished on all nodes")

                    elif operation['type'] == 'dse':
                        logger.info("Running dse command on {node}".format(node=operation['node']))
                        output = dse_cmd(node=operation['node'], options=operation['script'])
                        stats['output'] = output.split("\n")
                        stats['command'] = operation['script']
                        logger.info("dse commands finished")

                    end = datetime.datetime.now()
                    stats['end_date'] = end.isoformat()
                    stats['op_duration'] = str(end - start)
                    log_stats(stats, file=log)
                finally:
                    # Copy node logs:
                    retrieve_logs_and_create_tarball(job_id=stats['id'])
                    revision_config['last_log'] = stats['id']

                if capture_fincore:
                    stop_fincore_capture()
                    log_dir = os.path.join(CSTAR_PERF_LOGS_DIR, stats['id'])
                    retrieve_fincore_logs(log_dir)
                    # Restart fincore capture if this is not the last
                    # operation:
                    if operation_i < len(operations):
                        start_fincore_capture(interval=10)

            if flamegraph.is_enabled(revision_config):
                # Generate and Copy node flamegraphs
                execute(flamegraph.stop_perf_agent)
                execute(flamegraph.generate_flamegraph, rev_num)
                flamegraph_dir = os.path.join(os.path.expanduser('~'),'.cstar_perf', 'flamegraph')
                flamegraph_test_dir = os.path.join(flamegraph_dir, last_stress_operation_id)
                retrieve_flamegraph(flamegraph_test_dir, rev_num+1)
                sh.tar('cfvz', "{}.tar.gz".format(stats['id']), last_stress_operation_id, _cwd=flamegraph_dir)
                shutil.rmtree(flamegraph_test_dir)

            log_add_data(log, {'title':title,
                               'subtitle': subtitle,
                               'revisions': revisions})

            if revisions[-1].get('leave_data', leave_data):
                teardown(destroy=False, leave_data=True)
            else:
                kill_delay = 300 if profiler.yourkit_is_enabled(revision_config) else 0
                teardown(destroy=True, leave_data=False, kill_delay=kill_delay)

            if profiler.yourkit_is_enabled(revision_config):
                yourkit_config = profiler.yourkit_get_config()
                yourkit_dir = os.path.join(os.path.expanduser('~'),'.cstar_perf', 'yourkit')
                yourkit_test_dir = os.path.join(yourkit_dir, last_stress_operation_id)
                retrieve_yourkit(yourkit_test_dir, rev_num+1)
                sh.tar('cfvz', "{}.tar.gz".format(stats['id']),
                       last_stress_operation_id, _cwd=yourkit_dir)
                shutil.rmtree(yourkit_test_dir)
Ejemplo n.º 4
0
def stress_compare(revisions,
                   title,
                   log,
                   operations=[],
                   subtitle='',
                   capture_fincore=False,
                   initial_destroy=True,
                   leave_data=False,
                   keep_page_cache=False):
    """
    Run Stress on multiple C* branches and compare them.

    revisions - List of dictionaries that contain cluster configurations
                to trial. This is combined with the default config.
    title - The title of the comparison
    subtitle - A subtitle for more information (displayed smaller underneath)
    log - The json file path to record stats to
    operations - List of dictionaries indicating the operations. Example:
       [# cassandra-stress command, node defaults to cluster defined 'stress_node'
        {'type': 'stress',
         'command': 'write n=19M -rate threads=50',
         'node': 'node1',
         'wait_for_compaction': True},
        # nodetool command to run in parallel on nodes:
        {'type': 'nodetool',
         'command': 'decomission',
         'nodes': ['node1','node2']},
        # cqlsh script, node defaults to cluster defined 'stress_node'
        {'type': 'cqlsh',
         'script': "use my_ks; INSERT INTO blah (col1, col2) VALUES (val1, val2);",
         'node': 'node1'}
       ]
    capture_fincore - Enables capturing of linux-fincore logs of C* data files.
    initial_destroy - Destroy all data before the first revision is run.
    leave_data - Whether to leave the Cassandra data/commitlog/etc directories intact between revisions.
    keep_page_cache - Whether to leave the linux page cache intact between revisions.
    """
    validate_revisions_list(revisions)
    validate_operations_list(operations)

    pristine_config = copy.copy(fab_config)

    # initial_destroy settting can be set in the job
    # configuration, or manually in the call to this function. Either
    # is fine, but they shouldn't conflict. If they do, ValueError is
    # raised.
    if initial_destroy == True and pristine_config.get('initial_destroy',
                                                       None) == False:
        raise ValueError(
            'setting for initial_destroy conflicts in job config and stress_compare() call'
        )
    else:
        initial_destroy = pristine_config.get('initial_destroy',
                                              initial_destroy)

    if initial_destroy:
        logger.info("Cleaning up from prior runs of stress_compare ...")
        teardown(destroy=True, leave_data=False)

    # Update our local cassandra git remotes and branches
    _, localhost_entry = get_localhost()
    with common.fab.settings(hosts=[localhost_entry]):
        execute(cstar.update_cassandra_git)

    clean_stress()
    stress_revisions = set([
        operation['stress_revision'] for operation in operations
        if 'stress_revision' in operation
    ])
    stress_shas = setup_stress(stress_revisions)

    for rev_num, revision_config in enumerate(revisions):
        config = copy.copy(pristine_config)
        config.update(revision_config)
        revision = revision_config['revision']
        config['log'] = log
        config['title'] = title
        config['subtitle'] = subtitle
        product = dse if config['product'] == 'dse' else cstar

        # leave_data settting can be set in the revision
        # configuration, or manually in the call to this function.
        # Either is fine, but they shouldn't conflict. If they do,
        # ValueError is raised.
        if leave_data == True and revision_config.get('leave_data',
                                                      None) == False:
            raise ValueError(
                'setting for leave_data conflicts in job config and stress_compare() call'
            )
        else:
            leave_data = revision_config.get('leave_data', leave_data)

        logger.info(
            "Bringing up {revision} cluster...".format(revision=revision))

        # Drop the page cache between each revision, especially
        # important when leave_data=True :
        if not keep_page_cache:
            drop_page_cache()

        #Only fetch from git on the first run:
        git_fetch = True if rev_num == 0 else False
        revision_config['git_id'] = git_id = bootstrap(config,
                                                       destroy=True,
                                                       leave_data=leave_data,
                                                       git_fetch=git_fetch)

        if capture_fincore:
            start_fincore_capture(interval=10)

        for operation_i, operation in enumerate(operations, 1):
            start = datetime.datetime.now()
            stats = {
                "id": str(uuid.uuid1()),
                "type": operation['type'],
                "revision": revision,
                "git_id": git_id,
                "start_date": start.isoformat(),
                "label": revision_config.get('label',
                                             revision_config['revision'])
            }

            if operation['type'] == 'stress':
                # Default to all the nodes of the cluster if no
                # nodes were specified in the command:
                if operation.has_key('nodes'):
                    cmd = "{command} -node {hosts}".format(
                        command=operation['command'],
                        hosts=",".join(host=operation['nodes']))
                elif '-node' in operation['command']:
                    cmd = operation['command']
                else:
                    cmd = "{command} -node {hosts}".format(
                        command=operation['command'],
                        hosts=",".join([n for n in fab_config['hosts']]))
                stats['command'] = cmd
                stats['intervals'] = []
                stats['test'] = '{operation_i}_{operation}'.format(
                    operation_i=operation_i,
                    operation=cmd.strip().split(' ')[0]).replace(" ", "_")
                logger.info(
                    'Running stress operation : {cmd}  ...'.format(cmd=cmd))
                # Run stress:
                # (stress takes the stats as a parameter, and adds
                #  more as it runs):
                stress_sha = stress_shas[operation.get('stress_revision',
                                                       'default')]
                stats = stress(cmd, revision, stress_sha, stats=stats)
                # Wait for all compactions to finish (unless disabled):
                if operation.get('wait_for_compaction', True):
                    compaction_throughput = revision_config.get(
                        "compaction_throughput_mb_per_sec", 16)
                    wait_for_compaction(
                        compaction_throughput=compaction_throughput)

            elif operation['type'] == 'nodetool':
                if 'nodes' not in operation:
                    operation['nodes'] = 'all'
                if operation['nodes'] in ['all', 'ALL']:
                    nodes = [n for n in fab_config['hosts']]
                else:
                    nodes = operation['nodes']

                set_nodetool_path(
                    os.path.join(product.get_bin_path(), 'nodetool'))
                logger.info(
                    "Running nodetool on {nodes} with command: {command}".
                    format(nodes=operation['nodes'],
                           command=operation['command']))
                stats['command'] = operation['command']
                output = nodetool_multi(nodes, operation['command'])
                stats['output'] = output
                logger.info("Nodetool command finished on all nodes")

            elif operation['type'] == 'cqlsh':
                logger.info("Running cqlsh commands on {node}".format(
                    node=operation['node']))
                set_cqlsh_path(os.path.join(product.get_bin_path(), 'cqlsh'))
                output = cqlsh(operation['script'], operation['node'])
                stats['output'] = output.split("\n")
                logger.info("Cqlsh commands finished")

            elif operation['type'] == 'bash':
                nodes = operation.get('nodes',
                                      [n for n in fab_config['hosts']])
                logger.info(
                    "Running bash commands on: {nodes}".format(nodes=nodes))
                stats['output'] = bash(operation['script'], nodes)
                logger.info("Bash commands finished")

            end = datetime.datetime.now()
            stats['end_date'] = end.isoformat()
            stats['op_duration'] = str(end - start)
            log_stats(stats, file=log)

            #Copy node logs:
            logs_dir = os.path.join(os.path.expanduser('~'), '.cstar_perf',
                                    'logs')
            log_dir = os.path.join(logs_dir, stats['id'])
            os.makedirs(log_dir)
            retrieve_logs(log_dir)
            revision_config['last_log'] = stats['id']
            #Tar them for archiving:
            subprocess.Popen(shlex.split(
                'tar cfvz {id}.tar.gz {id}'.format(id=stats['id'])),
                             cwd=logs_dir).communicate()
            shutil.rmtree(log_dir)

            if capture_fincore:
                stop_fincore_capture()
                retrieve_fincore_logs(log_dir)
                # Restart fincore capture if this is not the last
                # operation:
                if operation_i < len(operations):
                    start_fincore_capture(interval=10)

        log_add_data(log, {
            'title': title,
            'subtitle': subtitle,
            'revisions': revisions
        })

        if revisions[-1].get('leave_data', leave_data):
            teardown(destroy=False, leave_data=True)
        else:
            teardown(destroy=True, leave_data=False)
Ejemplo n.º 5
0
def stress_compare(revisions,
                   title,
                   log,
                   operations = [],
                   subtitle = '',
                   capture_fincore=False,
                   initial_destroy=True,
                   leave_data=False,
                   keep_page_cache=False,
                   git_fetch_before_test=True,
                   bootstrap_before_test=True,
                   teardown_after_test=True
               ):
    """
    Run Stress on multiple C* branches and compare them.

    revisions - List of dictionaries that contain cluster configurations
                to trial. This is combined with the default config.
    title - The title of the comparison
    subtitle - A subtitle for more information (displayed smaller underneath)
    log - The json file path to record stats to
    operations - List of dictionaries indicating the operations. Example:
       [# cassandra-stress command, node defaults to cluster defined 'stress_node'
        {'type': 'stress',
         'command': 'write n=19M -rate threads=50',
         'node': 'node1',
         'wait_for_compaction': True},
        # nodetool command to run in parallel on nodes:
        {'type': 'nodetool',
         'command': 'decomission',
         'nodes': ['node1','node2']},
        # cqlsh script, node defaults to cluster defined 'stress_node'
        {'type': 'cqlsh',
         'script': "use my_ks; INSERT INTO blah (col1, col2) VALUES (val1, val2);",
         'node': 'node1'}
       ]
    capture_fincore - Enables capturing of linux-fincore logs of C* data files.
    initial_destroy - Destroy all data before the first revision is run.
    leave_data - Whether to leave the Cassandra data/commitlog/etc directories intact between revisions.
    keep_page_cache - Whether to leave the linux page cache intact between revisions.
    git_fetch_before_test (bool): If True, will update the cassandra.git with fab_common.git_repos
    bootstrap_before_test (bool): If True, will bootstrap DSE / C* before running the operations
    teardown_after_test (bool): If True, will shutdown DSE / C* after all of the operations
    """
    validate_revisions_list(revisions)
    validate_operations_list(operations)

    pristine_config = copy.copy(fab_config)

    # initial_destroy and git_fetch_before_test can be set in the job configuration,
    # or manually in the call to this function.
    # Either is fine, but they shouldn't conflict. If they do, a ValueError is raised.
    initial_destroy = get_bool_if_method_and_config_values_do_not_conflict('initial_destroy',
                                                                           initial_destroy,
                                                                           pristine_config,
                                                                           method_name='stress_compare')

    if initial_destroy:
        logger.info("Cleaning up from prior runs of stress_compare ...")
        teardown(destroy=True, leave_data=False)

    # https://datastax.jira.com/browse/CSTAR-633
    git_fetch_before_test = get_bool_if_method_and_config_values_do_not_conflict('git_fetch_before_test',
                                                                                 git_fetch_before_test,
                                                                                 pristine_config,
                                                                                 method_name='stress_compare')

    stress_shas = maybe_update_cassandra_git_and_setup_stress(operations, git_fetch=git_fetch_before_test)

    # Flamegraph Setup
    if flamegraph.is_enabled():
        execute(flamegraph.setup)

    with GracefulTerminationHandler() as handler:
        for rev_num, revision_config in enumerate(revisions):
            config = copy.copy(pristine_config)
            config.update(revision_config)
            revision = revision_config['revision']
            config['log'] = log
            config['title'] = title
            config['subtitle'] = subtitle
            product = dse if config.get('product') == 'dse' else cstar

            # leave_data, bootstrap_before_test, and teardown_after_test can be set in the job configuration,
            # or manually in the call to this function.
            # Either is fine, but they shouldn't conflict. If they do, a ValueError is raised.
            leave_data = get_bool_if_method_and_config_values_do_not_conflict('leave_data',
                                                                              leave_data,
                                                                              revision_config,
                                                                              method_name='stress_compare')

            # https://datastax.jira.com/browse/CSTAR-638
            bootstrap_before_test = get_bool_if_method_and_config_values_do_not_conflict('bootstrap_before_test',
                                                                                         bootstrap_before_test,
                                                                                         revision_config,
                                                                                         method_name='stress_compare')

            # https://datastax.jira.com/browse/CSTAR-639
            teardown_after_test = get_bool_if_method_and_config_values_do_not_conflict('teardown_after_test',
                                                                                       teardown_after_test,
                                                                                       revision_config,
                                                                                       method_name='stress_compare')

            logger.info("Bringing up {revision} cluster...".format(revision=revision))

            # Drop the page cache between each revision, especially
            # important when leave_data=True :
            if not keep_page_cache:
                drop_page_cache()

            # Only fetch from git on the first run and if git_fetch_before_test is True
            git_fetch_before_bootstrap = True if rev_num == 0 and git_fetch_before_test else False
            if bootstrap_before_test:
                revision_config['git_id'] = git_id = bootstrap(config,
                                                               destroy=initial_destroy,
                                                               leave_data=leave_data,
                                                               git_fetch=git_fetch_before_bootstrap)
            else:
                revision_config['git_id'] = git_id = config['revision']

            if flamegraph.is_enabled(revision_config):
                execute(flamegraph.ensure_stopped_perf_agent)
                execute(flamegraph.start_perf_agent, rev_num)

            if capture_fincore:
                start_fincore_capture(interval=10)

            last_stress_operation_id = 'None'
            for operation_i, operation in enumerate(operations, 1):
                try:
                    start = datetime.datetime.now()
                    stats = {
                        "id": str(uuid.uuid1()),
                        "type": operation['type'],
                        "revision": revision,
                        "git_id": git_id,
                        "start_date": start.isoformat(),
                        "label": revision_config.get('label', revision_config['revision']),
                        "test": '{operation_i}_{operation}'.format(
                            operation_i=operation_i,
                            operation=operation['type'])
                    }

                    if operation['type'] == 'stress':
                        last_stress_operation_id = stats['id']
                        # Default to all the nodes of the cluster if no
                        # nodes were specified in the command:
                        if operation.has_key('nodes'):
                            cmd = "{command} -node {hosts}".format(
                                command=operation['command'],
                                hosts=",".join(operation['nodes']))
                        elif '-node' in operation['command']:
                            cmd = operation['command']
                        else:
                            cmd = "{command} -node {hosts}".format(
                                command=operation['command'],
                                hosts=",".join([n for n in fab_config['hosts']]))
                        stats['command'] = cmd
                        stats['intervals'] = []
                        stats['test'] = '{operation_i}_{operation}'.format(
                            operation_i=operation_i, operation=cmd.strip().split(' ')[0]).replace(" ", "_")
                        logger.info('Running stress operation : {cmd}  ...'.format(cmd=cmd))
                        # Run stress:
                        # (stress takes the stats as a parameter, and adds
                        #  more as it runs):
                        stress_sha = stress_shas[operation.get('stress_revision', 'default')]
                        stats = stress(cmd, revision, stress_sha, stats=stats)
                        # Wait for all compactions to finish (unless disabled):
                        if operation.get('wait_for_compaction', True):
                            compaction_throughput = revision_config.get("compaction_throughput_mb_per_sec", 16)
                            wait_for_compaction(compaction_throughput=compaction_throughput)

                    elif operation['type'] == 'nodetool':
                        if 'nodes' not in operation:
                            operation['nodes'] = 'all'
                        if operation['nodes'] in ['all','ALL']:
                            nodes = [n for n in fab_config['hosts']]
                        else:
                            nodes = operation['nodes']

                        set_nodetool_path(os.path.join(product.get_bin_path(), 'nodetool'))
                        logger.info("Running nodetool on {nodes} with command: {command}".format(nodes=operation['nodes'], command=operation['command']))
                        stats['command'] = operation['command']
                        output = nodetool_multi(nodes, operation['command'])
                        stats['output'] = output
                        logger.info("Nodetool command finished on all nodes")

                    elif operation['type'] == 'cqlsh':
                        logger.info("Running cqlsh commands on {node}".format(node=operation['node']))
                        set_cqlsh_path(os.path.join(product.get_bin_path(), 'cqlsh'))
                        output = cqlsh(operation['script'], operation['node'])
                        stats['output'] = output.split("\n")
                        stats['command'] = operation['script']
                        logger.info("Cqlsh commands finished")

                    elif operation['type'] == 'bash':
                        nodes = operation.get('nodes', [n for n in fab_config['hosts']])
                        logger.info("Running bash commands on: {nodes}".format(nodes=nodes))
                        stats['output'] = bash(operation['script'], nodes)
                        stats['command'] = operation['script']
                        logger.info("Bash commands finished")

                    elif operation['type'] == 'spark_cassandra_stress':
                        nodes = operation.get('nodes', [n for n in fab_config['hosts']])
                        stress_node = config.get('stress_node', None)
                        # Note: once we have https://datastax.jira.com/browse/CSTAR-617, we should fix this to use
                        # client-tool when DSE_VERSION >= 4.8.0
                        # https://datastax.jira.com/browse/DSP-6025: dse client-tool
                        master_regex = re.compile(r"(.|\n)*(?P<master>spark:\/\/\d+.\d+.\d+.\d+:\d+)(.|\n)*")
                        master_out = dsetool_cmd(nodes[0], options='sparkmaster')[nodes[0]]
                        master_match = master_regex.match(master_out)
                        if not master_match:
                            raise ValueError('Could not find master address from "dsetool sparkmaster" cmd\n'
                                             'Found output: {f}'.format(f=master_out))
                        master_string = master_match.group('master')
                        build_spark_cassandra_stress = bool(distutils.util.strtobool(
                            str(operation.get('build_spark_cassandra_stress', 'True'))))
                        remove_existing_spark_data = bool(distutils.util.strtobool(
                            str(operation.get('remove_existing_spark_data', 'True'))))
                        logger.info("Running spark_cassandra_stress on {stress_node} "
                                    "using spark.cassandra.connection.host={node} and "
                                    "spark-master {master}".format(stress_node=stress_node,
                                                                   node=nodes[0],
                                                                   master=master_string))
                        output = spark_cassandra_stress(operation['script'], nodes, stress_node=stress_node,
                                                        master=master_string,
                                                        build_spark_cassandra_stress=build_spark_cassandra_stress,
                                                        remove_existing_spark_data=remove_existing_spark_data)
                        stats['output'] = output.get('output', 'No output captured')
                        stats['spark_cass_stress_time_in_seconds'] = output.get('stats', {}).get('TimeInSeconds', 'No time captured')
                        stats['spark_cass_stress_ops_per_second'] = output.get('stats', {}).get('OpsPerSecond', 'No ops/s captured')
                        logger.info("spark_cassandra_stress finished")

                    elif operation['type'] == 'ctool':
                        logger.info("Running ctool with parameters: {command}".format(command=operation['command']))
                        ctool = Ctool(operation['command'], common.config)
                        output = execute(ctool.run)
                        stats['output'] = output
                        logger.info("ctool finished")

                    elif operation['type'] == 'dsetool':
                        if 'nodes' not in operation:
                            operation['nodes'] = 'all'
                        if operation['nodes'] in ['all','ALL']:
                            nodes = [n for n in fab_config['hosts']]
                        else:
                            nodes = operation['nodes']

                        dsetool_options = operation['script']
                        logger.info("Running dsetool {command} on {nodes}".format(nodes=operation['nodes'], command=dsetool_options))
                        stats['command'] = dsetool_options
                        output = dsetool_cmd(nodes=nodes, options=dsetool_options)
                        stats['output'] = output
                        logger.info("dsetool command finished on all nodes")

                    elif operation['type'] == 'dse':
                        logger.info("Running dse command on {node}".format(node=operation['node']))
                        output = dse_cmd(node=operation['node'], options=operation['script'])
                        stats['output'] = output.split("\n")
                        stats['command'] = operation['script']
                        logger.info("dse commands finished")

                    end = datetime.datetime.now()
                    stats['end_date'] = end.isoformat()
                    stats['op_duration'] = str(end - start)
                    log_stats(stats, file=log)
                finally:
                    # Copy node logs:
                    retrieve_logs_and_create_tarball(job_id=stats['id'])
                    revision_config['last_log'] = stats['id']

                if capture_fincore:
                    stop_fincore_capture()
                    log_dir = os.path.join(CSTAR_PERF_LOGS_DIR, stats['id'])
                    retrieve_fincore_logs(log_dir)
                    # Restart fincore capture if this is not the last
                    # operation:
                    if operation_i < len(operations):
                        start_fincore_capture(interval=10)

            if flamegraph.is_enabled(revision_config):
                # Generate and Copy node flamegraphs
                execute(flamegraph.stop_perf_agent)
                execute(flamegraph.generate_flamegraph, rev_num)
                flamegraph_dir = os.path.join(os.path.expanduser('~'),'.cstar_perf', 'flamegraph')
                flamegraph_test_dir = os.path.join(flamegraph_dir, last_stress_operation_id)
                retrieve_flamegraph(flamegraph_test_dir, rev_num+1)
                sh.tar('cfvz', "{}.tar.gz".format(stats['id']), last_stress_operation_id, _cwd=flamegraph_dir)
                shutil.rmtree(flamegraph_test_dir)

            log_add_data(log, {'title':title,
                               'subtitle': subtitle,
                               'revisions': revisions})
            if teardown_after_test:
                if revisions[-1].get('leave_data', leave_data):
                    teardown(destroy=False, leave_data=True)
                else:
                    kill_delay = 300 if profiler.yourkit_is_enabled(revision_config) else 0
                    teardown(destroy=True, leave_data=False, kill_delay=kill_delay)

            if profiler.yourkit_is_enabled(revision_config):
                yourkit_config = profiler.yourkit_get_config()
                yourkit_dir = os.path.join(os.path.expanduser('~'),'.cstar_perf', 'yourkit')
                yourkit_test_dir = os.path.join(yourkit_dir, last_stress_operation_id)
                retrieve_yourkit(yourkit_test_dir, rev_num+1)
                sh.tar('cfvz', "{}.tar.gz".format(stats['id']),
                       last_stress_operation_id, _cwd=yourkit_dir)
                shutil.rmtree(yourkit_test_dir)
Ejemplo n.º 6
0
def bootstrap_cluster(cfg):
    config = copy.copy(pristine_config)
    config.update(cfg)

    git_id = bootstrap(config, destroy=True)
    return git_id
Ejemplo n.º 7
0
def stress_compare(revisions, 
                   title,
                   log,
                   operations = [],
                   subtitle = '',
                   capture_fincore=False,
                   initial_destroy=True,
                   leave_data=False,
                   keep_page_cache=False
               ):
    """
    Run Stress on multiple C* branches and compare them.
    
    revisions - List of dictionaries that contain cluster configurations
                to trial. This is combined with the default config.
    title - The title of the comparison
    subtitle - A subtitle for more information (displayed smaller underneath)
    log - The json file path to record stats to
    operations - List of dictionaries indicating the operations. Example:
       [# cassandra-stress command, node defaults to cluster defined 'stress_node'
        {'type': 'stress',
         'command': 'write n=19000000 -rate threads=50',
         'node': 'node1',
         'wait_for_compaction': True},
        # nodetool command to run in parallel on nodes:
        {'type': 'nodetool',
         'command': 'decomission',
         'nodes': ['node1','node2']},
        # cqlsh script, node defaults to cluster defined 'stress_node'
        {'type': 'cqlsh',
         'script': "use my_ks; INSERT INTO blah (col1, col2) VALUES (val1, val2);",
         'node': 'node1'}
       ]
    capture_fincore - Enables capturing of linux-fincore logs of C* data files.
    initial_destroy - Destroy all data before the first revision is run.
    leave_data - Whether to leave the Cassandra data/commitlog/etc directories intact between revisions.
    keep_page_cache - Whether to leave the linux page cache intact between revisions.
    """
    validate_revisions_list(revisions)
    validate_operations_list(operations)

    pristine_config = copy.copy(fab_config)

    # initial_destroy settting can be set in the job
    # configuration, or manually in the call to this function. Either
    # is fine, but they shouldn't conflict. If they do, ValueError is
    # raised.
    if initial_destroy == True and pristine_config.get('initial_destroy', None) == False:
        raise ValueError('setting for initial_destroy conflicts in job config and stress_compare() call')
    else:
        initial_destroy = pristine_config.get('initial_destroy', initial_destroy)
        
    if initial_destroy:
        logger.info("Cleaning up from prior runs of stress_compare ...")
        teardown(destroy=True, leave_data=False)

    # Clean stress builds
    stress_builds = [b for b in os.listdir(CASSANDRA_STRESS_PATH)
                     if b not in ['default', 'trunk']]
    for stress_build in stress_builds:
        path = os.path.join(CASSANDRA_STRESS_PATH, stress_build)
        logger.info("Removing stress build '{}'".format(path))
        shutil.rmtree(path)

    for rev_num, revision_config in enumerate(revisions):
        config = copy.copy(pristine_config)
        config.update(revision_config)
        revision = revision_config['revision']
        config['log'] = log
        config['title'] = title
        config['subtitle'] = subtitle

        # leave_data settting can be set in the revision
        # configuration, or manually in the call to this function.
        # Either is fine, but they shouldn't conflict. If they do,
        # ValueError is raised.
        if leave_data == True and revision_config.get('leave_data', None) == False:
            raise ValueError('setting for leave_data conflicts in job config and stress_compare() call')
        else:
            leave_data = revision_config.get('leave_data', leave_data)
                
        logger.info("Bringing up {revision} cluster...".format(revision=revision))
        
        # Drop the page cache between each revision, especially 
        # important when leave_data=True : 
        if not keep_page_cache:
            drop_page_cache()

        #Only fetch from git on the first run:
        git_fetch = True if rev_num == 0 else False
        revision_config['git_id'] = git_id = bootstrap(config, destroy=True, leave_data=leave_data, git_fetch=git_fetch)
    
        if capture_fincore:
            start_fincore_capture(interval=10)

        for operation_i, operation in enumerate(operations, 1):
            start = datetime.datetime.now()
            stats = {"id":str(uuid.uuid1()), "type":operation['type'], 
                     "revision": revision, "git_id": git_id, "start_date":start.isoformat(),
                     "label":revision_config.get('label', revision_config['revision'])}

            if operation['type'] == 'stress':
                # Default to all the nodes of the cluster if no 
                # nodes were specified in the command:
                if operation.has_key('nodes'):
                    cmd = "{command} -node {hosts}".format(
                        command=operation['command'], 
                        hosts=",".join(host=operation['nodes']))
                elif '-node' in operation['command']:
                    cmd = operation['command']
                else:
                    cmd = "{command} -node {hosts}".format(
                        command=operation['command'], 
                        hosts=",".join([n for n in fab_config['hosts']]))
                stats['command'] = cmd
                stats['intervals'] = []
                stats['test'] = '{operation_i}_{operation}'.format(
                    operation_i=operation_i, operation=cmd.strip().split(' ')[0]).replace(" ","_")
                logger.info('Running stress operation : {cmd}  ...'.format(cmd=cmd))
                # Run stress:
                # (stress takes the stats as a parameter, and adds
                #  more as it runs):
                stats = stress(cmd, revision, stats, stress_revision=revision_config.get('stress_revision', None))
                # Wait for all compactions to finish (unless disabled):
                if operation.get('wait_for_compaction', True):
                    compaction_throughput = revision_config.get("compaction_throughput_mb_per_sec", 16)
                    wait_for_compaction(compaction_throughput=compaction_throughput)

            elif operation['type'] == 'nodetool':
                if 'nodes' not in operation:
                    operation['nodes'] = 'all'
                if operation['nodes'] in ['all','ALL']:
                    nodes = [n for n in fab_config['hosts']]
                else:
                    nodes = operation['nodes']

                logger.info("Running nodetool on {nodes} with command: {command}".format(nodes=operation['nodes'], command=operation['command']))
                stats['command'] = operation['command']
                output = nodetool_multi(nodes, operation['command'])
                stats['output'] = output
                logger.info("Nodetool command finished on all nodes")

            elif operation['type'] == 'cqlsh':
                logger.info("Running cqlsh commands on {node}".format(node=operation['node']))
                output = cqlsh(operation['script'], operation['node'])
                stats['output'] = output.split("\n")
                logger.info("Cqlsh commands finished")

            elif operation['type'] == 'bash':
                nodes = operation.get('nodes', [n for n in fab_config['hosts']])
                logger.info("Running bash commands on {node}".format(nodes=nodes))
                output = bash(operation['script'], nodes)
                stats['output'] = output.split("\n")
                logger.info("Bash commands finished")


            end = datetime.datetime.now()
            stats['end_date'] = end.isoformat()
            stats['op_duration'] = str(end - start)
            log_stats(stats, file=log)

            #Copy node logs:
            logs_dir = os.path.join(os.path.expanduser('~'),'.cstar_perf','logs')
            log_dir = os.path.join(logs_dir, stats['id'])
            os.makedirs(log_dir)
            retrieve_logs(log_dir)
            revision_config['last_log'] = stats['id']
            #Tar them for archiving:
            subprocess.Popen(shlex.split('tar cfvz {id}.tar.gz {id}'.format(id=stats['id'])), cwd=logs_dir).communicate()
            shutil.rmtree(log_dir)

            if capture_fincore:
                stop_fincore_capture()
                retrieve_fincore_logs(log_dir)
                # Restart fincore capture if this is not the last
                # operation:
                if operation_i < len(operations):
                    start_fincore_capture(interval=10)

        log_add_data(log, {'title':title,
                           'subtitle': subtitle,
                           'revisions': revisions})

        if revisions[-1].get('leave_data', False):
            teardown(destroy=False, leave_data=True)
        else:
            teardown(destroy=True, leave_data=False)
Ejemplo n.º 8
0
def bootstrap_cluster(cfg):
    config = copy.copy(pristine_config)
    config.update(cfg)

    git_id = bootstrap(config, destroy=True)
    return git_id
Ejemplo n.º 9
0
def stress_compare(
    revisions,
    title,
    log,
    operations=[],
    subtitle="",
    capture_fincore=False,
    initial_destroy=True,
    leave_data=False,
    keep_page_cache=False,
):
    """
    Run Stress on multiple C* branches and compare them.

    revisions - List of dictionaries that contain cluster configurations
                to trial. This is combined with the default config.
    title - The title of the comparison
    subtitle - A subtitle for more information (displayed smaller underneath)
    log - The json file path to record stats to
    operations - List of dictionaries indicating the operations. Example:
       [# cassandra-stress command, node defaults to cluster defined 'stress_node'
        {'type': 'stress',
         'command': 'write n=19M -rate threads=50',
         'node': 'node1',
         'wait_for_compaction': True},
        # nodetool command to run in parallel on nodes:
        {'type': 'nodetool',
         'command': 'decomission',
         'nodes': ['node1','node2']},
        # cqlsh script, node defaults to cluster defined 'stress_node'
        {'type': 'cqlsh',
         'script': "use my_ks; INSERT INTO blah (col1, col2) VALUES (val1, val2);",
         'node': 'node1'}
       ]
    capture_fincore - Enables capturing of linux-fincore logs of C* data files.
    initial_destroy - Destroy all data before the first revision is run.
    leave_data - Whether to leave the Cassandra data/commitlog/etc directories intact between revisions.
    keep_page_cache - Whether to leave the linux page cache intact between revisions.
    """
    validate_revisions_list(revisions)
    validate_operations_list(operations)

    pristine_config = copy.copy(fab_config)

    # initial_destroy settting can be set in the job
    # configuration, or manually in the call to this function. Either
    # is fine, but they shouldn't conflict. If they do, ValueError is
    # raised.
    if initial_destroy == True and pristine_config.get("initial_destroy", None) == False:
        raise ValueError("setting for initial_destroy conflicts in job config and stress_compare() call")
    else:
        initial_destroy = pristine_config.get("initial_destroy", initial_destroy)

    if initial_destroy:
        logger.info("Cleaning up from prior runs of stress_compare ...")
        teardown(destroy=True, leave_data=False)

    # Update our local cassandra git remotes and branches
    _, localhost_entry = get_localhost()
    with common.fab.settings(hosts=[localhost_entry]):
        execute(cstar.update_cassandra_git)

    clean_stress()
    stress_revisions = set([operation["stress_revision"] for operation in operations if "stress_revision" in operation])
    stress_shas = setup_stress(stress_revisions)

    for rev_num, revision_config in enumerate(revisions):
        config = copy.copy(pristine_config)
        config.update(revision_config)
        revision = revision_config["revision"]
        config["log"] = log
        config["title"] = title
        config["subtitle"] = subtitle
        product = dse if config["product"] == "dse" else cstar

        # leave_data settting can be set in the revision
        # configuration, or manually in the call to this function.
        # Either is fine, but they shouldn't conflict. If they do,
        # ValueError is raised.
        if leave_data == True and revision_config.get("leave_data", None) == False:
            raise ValueError("setting for leave_data conflicts in job config and stress_compare() call")
        else:
            leave_data = revision_config.get("leave_data", leave_data)

        logger.info("Bringing up {revision} cluster...".format(revision=revision))

        # Drop the page cache between each revision, especially
        # important when leave_data=True :
        if not keep_page_cache:
            drop_page_cache()

        # Only fetch from git on the first run:
        git_fetch = True if rev_num == 0 else False
        revision_config["git_id"] = git_id = bootstrap(config, destroy=True, leave_data=leave_data, git_fetch=git_fetch)

        if capture_fincore:
            start_fincore_capture(interval=10)

        for operation_i, operation in enumerate(operations, 1):
            start = datetime.datetime.now()
            stats = {
                "id": str(uuid.uuid1()),
                "type": operation["type"],
                "revision": revision,
                "git_id": git_id,
                "start_date": start.isoformat(),
                "label": revision_config.get("label", revision_config["revision"]),
            }

            if operation["type"] == "stress":
                # Default to all the nodes of the cluster if no
                # nodes were specified in the command:
                if operation.has_key("nodes"):
                    cmd = "{command} -node {hosts}".format(
                        command=operation["command"], hosts=",".join(host=operation["nodes"])
                    )
                elif "-node" in operation["command"]:
                    cmd = operation["command"]
                else:
                    cmd = "{command} -node {hosts}".format(
                        command=operation["command"], hosts=",".join([n for n in fab_config["hosts"]])
                    )
                stats["command"] = cmd
                stats["intervals"] = []
                stats["test"] = "{operation_i}_{operation}".format(
                    operation_i=operation_i, operation=cmd.strip().split(" ")[0]
                ).replace(" ", "_")
                logger.info("Running stress operation : {cmd}  ...".format(cmd=cmd))
                # Run stress:
                # (stress takes the stats as a parameter, and adds
                #  more as it runs):
                stress_sha = stress_shas[operation.get("stress_revision", "default")]
                stats = stress(cmd, revision, stress_sha, stats=stats)
                # Wait for all compactions to finish (unless disabled):
                if operation.get("wait_for_compaction", True):
                    compaction_throughput = revision_config.get("compaction_throughput_mb_per_sec", 16)
                    wait_for_compaction(compaction_throughput=compaction_throughput)

            elif operation["type"] == "nodetool":
                if "nodes" not in operation:
                    operation["nodes"] = "all"
                if operation["nodes"] in ["all", "ALL"]:
                    nodes = [n for n in fab_config["hosts"]]
                else:
                    nodes = operation["nodes"]

                set_nodetool_path(os.path.join(product.get_bin_path(), "nodetool"))
                logger.info(
                    "Running nodetool on {nodes} with command: {command}".format(
                        nodes=operation["nodes"], command=operation["command"]
                    )
                )
                stats["command"] = operation["command"]
                output = nodetool_multi(nodes, operation["command"])
                stats["output"] = output
                logger.info("Nodetool command finished on all nodes")

            elif operation["type"] == "cqlsh":
                logger.info("Running cqlsh commands on {node}".format(node=operation["node"]))
                set_cqlsh_path(os.path.join(product.get_bin_path(), "cqlsh"))
                output = cqlsh(operation["script"], operation["node"])
                stats["output"] = output.split("\n")
                logger.info("Cqlsh commands finished")

            elif operation["type"] == "bash":
                nodes = operation.get("nodes", [n for n in fab_config["hosts"]])
                logger.info("Running bash commands on: {nodes}".format(nodes=nodes))
                stats["output"] = bash(operation["script"], nodes)
                logger.info("Bash commands finished")

            end = datetime.datetime.now()
            stats["end_date"] = end.isoformat()
            stats["op_duration"] = str(end - start)
            log_stats(stats, file=log)

            # Copy node logs:
            logs_dir = os.path.join(os.path.expanduser("~"), ".cstar_perf", "logs")
            log_dir = os.path.join(logs_dir, stats["id"])
            os.makedirs(log_dir)
            retrieve_logs(log_dir)
            revision_config["last_log"] = stats["id"]
            # Tar them for archiving:
            subprocess.Popen(
                shlex.split("tar cfvz {id}.tar.gz {id}".format(id=stats["id"])), cwd=logs_dir
            ).communicate()
            shutil.rmtree(log_dir)

            if capture_fincore:
                stop_fincore_capture()
                retrieve_fincore_logs(log_dir)
                # Restart fincore capture if this is not the last
                # operation:
                if operation_i < len(operations):
                    start_fincore_capture(interval=10)

        log_add_data(log, {"title": title, "subtitle": subtitle, "revisions": revisions})

        if revisions[-1].get("leave_data", leave_data):
            teardown(destroy=False, leave_data=True)
        else:
            teardown(destroy=True, leave_data=False)
Ejemplo n.º 10
0
def stress_compare(revisions,
                   title,
                   log,
                   operations=[],
                   subtitle='',
                   capture_fincore=False,
                   initial_destroy=True,
                   leave_data=False,
                   keep_page_cache=False,
                   git_fetch_before_test=True,
                   bootstrap_before_test=True,
                   teardown_after_test=True):
    """
    Run Stress on multiple C* branches and compare them.

    revisions - List of dictionaries that contain cluster configurations
                to trial. This is combined with the default config.
    title - The title of the comparison
    subtitle - A subtitle for more information (displayed smaller underneath)
    log - The json file path to record stats to
    operations - List of dictionaries indicating the operations. Example:
       [# cassandra-stress command, node defaults to cluster defined 'stress_node'
        {'type': 'stress',
         'command': 'write n=19M -rate threads=50',
         'node': 'node1',
         'wait_for_compaction': True},
        # nodetool command to run in parallel on nodes:
        {'type': 'nodetool',
         'command': 'decomission',
         'nodes': ['node1','node2']},
        # cqlsh script, node defaults to cluster defined 'stress_node'
        {'type': 'cqlsh',
         'script': "use my_ks; INSERT INTO blah (col1, col2) VALUES (val1, val2);",
         'node': 'node1'}
       ]
    capture_fincore - Enables capturing of linux-fincore logs of C* data files.
    initial_destroy - Destroy all data before the first revision is run.
    leave_data - Whether to leave the Cassandra data/commitlog/etc directories intact between revisions.
    keep_page_cache - Whether to leave the linux page cache intact between revisions.
    git_fetch_before_test (bool): If True, will update the cassandra.git with fab_common.git_repos
    bootstrap_before_test (bool): If True, will bootstrap DSE / C* before running the operations
    teardown_after_test (bool): If True, will shutdown DSE / C* after all of the operations
    """
    validate_revisions_list(revisions)
    validate_operations_list(operations)

    pristine_config = copy.copy(fab_config)

    # initial_destroy and git_fetch_before_test can be set in the job configuration,
    # or manually in the call to this function.
    # Either is fine, but they shouldn't conflict. If they do, a ValueError is raised.
    initial_destroy = get_bool_if_method_and_config_values_do_not_conflict(
        'initial_destroy',
        initial_destroy,
        pristine_config,
        method_name='stress_compare')

    if initial_destroy:
        logger.info("Cleaning up from prior runs of stress_compare ...")
        teardown(destroy=True, leave_data=False)

    # https://datastax.jira.com/browse/CSTAR-633
    git_fetch_before_test = get_bool_if_method_and_config_values_do_not_conflict(
        'git_fetch_before_test',
        git_fetch_before_test,
        pristine_config,
        method_name='stress_compare')

    stress_shas = maybe_update_cassandra_git_and_setup_stress(
        operations, git_fetch=git_fetch_before_test)

    # Flamegraph Setup
    if flamegraph.is_enabled():
        execute(flamegraph.setup)

    with GracefulTerminationHandler() as handler:
        for rev_num, revision_config in enumerate(revisions):
            config = copy.copy(pristine_config)
            config.update(revision_config)
            revision = revision_config['revision']
            config['log'] = log
            config['title'] = title
            config['subtitle'] = subtitle
            product = dse if config.get('product') == 'dse' else cstar

            # leave_data, bootstrap_before_test, and teardown_after_test can be set in the job configuration,
            # or manually in the call to this function.
            # Either is fine, but they shouldn't conflict. If they do, a ValueError is raised.
            leave_data = get_bool_if_method_and_config_values_do_not_conflict(
                'leave_data',
                leave_data,
                revision_config,
                method_name='stress_compare')

            # https://datastax.jira.com/browse/CSTAR-638
            bootstrap_before_test = get_bool_if_method_and_config_values_do_not_conflict(
                'bootstrap_before_test',
                bootstrap_before_test,
                revision_config,
                method_name='stress_compare')

            # https://datastax.jira.com/browse/CSTAR-639
            teardown_after_test = get_bool_if_method_and_config_values_do_not_conflict(
                'teardown_after_test',
                teardown_after_test,
                revision_config,
                method_name='stress_compare')

            logger.info(
                "Bringing up {revision} cluster...".format(revision=revision))

            # Drop the page cache between each revision, especially
            # important when leave_data=True :
            if not keep_page_cache:
                drop_page_cache()

            # Only fetch from git on the first run and if git_fetch_before_test is True
            git_fetch_before_bootstrap = True if rev_num == 0 and git_fetch_before_test else False
            if bootstrap_before_test:
                revision_config['git_id'] = git_id = bootstrap(
                    config,
                    destroy=initial_destroy,
                    leave_data=leave_data,
                    git_fetch=git_fetch_before_bootstrap)
            else:
                revision_config['git_id'] = git_id = config['revision']

            if flamegraph.is_enabled(revision_config):
                execute(flamegraph.ensure_stopped_perf_agent)
                execute(flamegraph.start_perf_agent, rev_num)

            if capture_fincore:
                start_fincore_capture(interval=10)

            last_stress_operation_id = 'None'
            for operation_i, operation in enumerate(operations, 1):
                try:
                    start = datetime.datetime.now()
                    stats = {
                        "id":
                        str(uuid.uuid1()),
                        "type":
                        operation['type'],
                        "revision":
                        revision,
                        "git_id":
                        git_id,
                        "start_date":
                        start.isoformat(),
                        "label":
                        revision_config.get('label',
                                            revision_config['revision']),
                        "test":
                        '{operation_i}_{operation}'.format(
                            operation_i=operation_i,
                            operation=operation['type'])
                    }

                    if operation['type'] == 'stress':
                        last_stress_operation_id = stats['id']
                        # Default to all the nodes of the cluster if no
                        # nodes were specified in the command:
                        if operation.has_key('nodes'):
                            cmd = "{command} -node {hosts}".format(
                                command=operation['command'],
                                hosts=",".join(operation['nodes']))
                        elif '-node' in operation['command']:
                            cmd = operation['command']
                        else:
                            cmd = "{command} -node {hosts}".format(
                                command=operation['command'],
                                hosts=",".join(
                                    [n for n in fab_config['hosts']]))
                        stats['command'] = cmd
                        stats['intervals'] = []
                        stats['test'] = '{operation_i}_{operation}'.format(
                            operation_i=operation_i,
                            operation=cmd.strip().split(' ')[0]).replace(
                                " ", "_")
                        logger.info(
                            'Running stress operation : {cmd}  ...'.format(
                                cmd=cmd))
                        # Run stress:
                        # (stress takes the stats as a parameter, and adds
                        #  more as it runs):
                        stress_sha = stress_shas[operation.get(
                            'stress_revision', 'default')]
                        stats = stress(cmd, revision, stress_sha, stats=stats)
                        # Wait for all compactions to finish (unless disabled):
                        if operation.get('wait_for_compaction', True):
                            compaction_throughput = revision_config.get(
                                "compaction_throughput_mb_per_sec", 16)
                            wait_for_compaction(
                                compaction_throughput=compaction_throughput)

                    elif operation['type'] == 'nodetool':
                        if 'nodes' not in operation:
                            operation['nodes'] = 'all'
                        if operation['nodes'] in ['all', 'ALL']:
                            nodes = [n for n in fab_config['hosts']]
                        else:
                            nodes = operation['nodes']

                        set_nodetool_path(
                            os.path.join(product.get_bin_path(), 'nodetool'))
                        logger.info(
                            "Running nodetool on {nodes} with command: {command}"
                            .format(nodes=operation['nodes'],
                                    command=operation['command']))
                        stats['command'] = operation['command']
                        output = nodetool_multi(nodes, operation['command'])
                        stats['output'] = output
                        logger.info("Nodetool command finished on all nodes")

                    elif operation['type'] == 'cqlsh':
                        logger.info("Running cqlsh commands on {node}".format(
                            node=operation['node']))
                        set_cqlsh_path(
                            os.path.join(product.get_bin_path(), 'cqlsh'))
                        output = cqlsh(operation['script'], operation['node'])
                        stats['output'] = output.split("\n")
                        stats['command'] = operation['script']
                        logger.info("Cqlsh commands finished")

                    elif operation['type'] == 'bash':
                        nodes = operation.get('nodes',
                                              [n for n in fab_config['hosts']])
                        logger.info("Running bash commands on: {nodes}".format(
                            nodes=nodes))
                        stats['output'] = bash(operation['script'], nodes)
                        stats['command'] = operation['script']
                        logger.info("Bash commands finished")

                    elif operation['type'] == 'spark_cassandra_stress':
                        nodes = operation.get('nodes',
                                              [n for n in fab_config['hosts']])
                        stress_node = config.get('stress_node', None)
                        # Note: once we have https://datastax.jira.com/browse/CSTAR-617, we should fix this to use
                        # client-tool when DSE_VERSION >= 4.8.0
                        # https://datastax.jira.com/browse/DSP-6025: dse client-tool
                        master_regex = re.compile(
                            r"(.|\n)*(?P<master>spark:\/\/\d+.\d+.\d+.\d+:\d+)(.|\n)*"
                        )
                        master_out = dsetool_cmd(
                            nodes[0], options='sparkmaster')[nodes[0]]
                        master_match = master_regex.match(master_out)
                        if not master_match:
                            raise ValueError(
                                'Could not find master address from "dsetool sparkmaster" cmd\n'
                                'Found output: {f}'.format(f=master_out))
                        master_string = master_match.group('master')
                        build_spark_cassandra_stress = bool(
                            distutils.util.strtobool(
                                str(
                                    operation.get(
                                        'build_spark_cassandra_stress',
                                        'True'))))
                        remove_existing_spark_data = bool(
                            distutils.util.strtobool(
                                str(
                                    operation.get('remove_existing_spark_data',
                                                  'True'))))
                        logger.info(
                            "Running spark_cassandra_stress on {stress_node} "
                            "using spark.cassandra.connection.host={node} and "
                            "spark-master {master}".format(
                                stress_node=stress_node,
                                node=nodes[0],
                                master=master_string))
                        output = spark_cassandra_stress(
                            operation['script'],
                            nodes,
                            stress_node=stress_node,
                            master=master_string,
                            build_spark_cassandra_stress=
                            build_spark_cassandra_stress,
                            remove_existing_spark_data=
                            remove_existing_spark_data)
                        stats['output'] = output.get('output',
                                                     'No output captured')
                        stats[
                            'spark_cass_stress_time_in_seconds'] = output.get(
                                'stats', {}).get('TimeInSeconds',
                                                 'No time captured')
                        stats['spark_cass_stress_ops_per_second'] = output.get(
                            'stats', {}).get('OpsPerSecond',
                                             'No ops/s captured')
                        logger.info("spark_cassandra_stress finished")

                    elif operation['type'] == 'ctool':
                        logger.info(
                            "Running ctool with parameters: {command}".format(
                                command=operation['command']))
                        ctool = Ctool(operation['command'], common.config)
                        output = execute(ctool.run)
                        stats['output'] = output
                        logger.info("ctool finished")

                    elif operation['type'] == 'dsetool':
                        if 'nodes' not in operation:
                            operation['nodes'] = 'all'
                        if operation['nodes'] in ['all', 'ALL']:
                            nodes = [n for n in fab_config['hosts']]
                        else:
                            nodes = operation['nodes']

                        dsetool_options = operation['script']
                        logger.info(
                            "Running dsetool {command} on {nodes}".format(
                                nodes=operation['nodes'],
                                command=dsetool_options))
                        stats['command'] = dsetool_options
                        output = dsetool_cmd(nodes=nodes,
                                             options=dsetool_options)
                        stats['output'] = output
                        logger.info("dsetool command finished on all nodes")

                    elif operation['type'] == 'dse':
                        logger.info("Running dse command on {node}".format(
                            node=operation['node']))
                        output = dse_cmd(node=operation['node'],
                                         options=operation['script'])
                        stats['output'] = output.split("\n")
                        stats['command'] = operation['script']
                        logger.info("dse commands finished")

                    end = datetime.datetime.now()
                    stats['end_date'] = end.isoformat()
                    stats['op_duration'] = str(end - start)
                    log_stats(stats, file=log)
                finally:
                    # Copy node logs:
                    retrieve_logs_and_create_tarball(job_id=stats['id'])
                    revision_config['last_log'] = stats['id']

                if capture_fincore:
                    stop_fincore_capture()
                    log_dir = os.path.join(CSTAR_PERF_LOGS_DIR, stats['id'])
                    retrieve_fincore_logs(log_dir)
                    # Restart fincore capture if this is not the last
                    # operation:
                    if operation_i < len(operations):
                        start_fincore_capture(interval=10)

            if flamegraph.is_enabled(revision_config):
                # Generate and Copy node flamegraphs
                execute(flamegraph.stop_perf_agent)
                execute(flamegraph.generate_flamegraph, rev_num)
                flamegraph_dir = os.path.join(os.path.expanduser('~'),
                                              '.cstar_perf', 'flamegraph')
                flamegraph_test_dir = os.path.join(flamegraph_dir,
                                                   last_stress_operation_id)
                retrieve_flamegraph(flamegraph_test_dir, rev_num + 1)
                sh.tar('cfvz',
                       "{}.tar.gz".format(stats['id']),
                       last_stress_operation_id,
                       _cwd=flamegraph_dir)
                shutil.rmtree(flamegraph_test_dir)

            log_add_data(log, {
                'title': title,
                'subtitle': subtitle,
                'revisions': revisions
            })
            if teardown_after_test:
                if revisions[-1].get('leave_data', leave_data):
                    teardown(destroy=False, leave_data=True)
                else:
                    kill_delay = 300 if profiler.yourkit_is_enabled(
                        revision_config) else 0
                    teardown(destroy=True,
                             leave_data=False,
                             kill_delay=kill_delay)

            if profiler.yourkit_is_enabled(revision_config):
                yourkit_config = profiler.yourkit_get_config()
                yourkit_dir = os.path.join(os.path.expanduser('~'),
                                           '.cstar_perf', 'yourkit')
                yourkit_test_dir = os.path.join(yourkit_dir,
                                                last_stress_operation_id)
                retrieve_yourkit(yourkit_test_dir, rev_num + 1)
                sh.tar('cfvz',
                       "{}.tar.gz".format(stats['id']),
                       last_stress_operation_id,
                       _cwd=yourkit_dir)
                shutil.rmtree(yourkit_test_dir)