Example #1
0
def resource_usage(bcbio_log, cluster, rawdir, verbose):
    """Generate system statistics from bcbio runs.

    Parse the obtained files and put the information in
    a :class pandas.DataFrame:.

    :param bcbio_log:   local path to bcbio log file written by the run
    :param cluster:
    :param rawdir:      directory to put raw data files
    :param verbose:     increase verbosity

    :return: a tuple with three dictionaries, the first one contains
             an instance of :pandas.DataFrame: for each host, the second one
             contains information regarding the hardware configuration and
             the last one contains information regarding timing.
    :type return: tuple
    """
    data_frames = {}
    hardware_info = {}
    time_frame = log_time_frame(bcbio_log)

    for collectl_file in sorted(os.listdir(rawdir)):
        if not collectl_file.endswith('.raw.gz'):
            continue

        # Only load filenames within sampling timerange (gathered from bcbio_log time_frame)
        if rawfile_within_timeframe(collectl_file, time_frame):

            collectl_path = os.path.join(rawdir, collectl_file)
            data, hardware = load_collectl(
                collectl_path, time_frame.start, time_frame.end)

            if len(data) == 0:
                #raise ValueError("No data present in collectl file %s, mismatch in timestamps between raw collectl and log file?", collectl_path)
                continue

            host = re.sub(r'-\d{8}-\d{6}\.raw\.gz$', '', collectl_file)
            hardware_info[host] = hardware
            if host not in data_frames:
                data_frames[host] = data
            else:
                data_frames[host] = pd.concat([data_frames[host], data])

    return (data_frames, hardware_info, time_frame.steps)
Example #2
0
def generate_graphs(collectl_datadir, bcbio_log_path, outdir, verbose=False):
    """Generate all graphs for a bcbio run."""
    if verbose:
        print('Reading timings from bcbio log...')
    steps = get_bcbio_timings(bcbio_log_path)
    start_time = min(steps.keys())
    end_time = max(steps.keys())

    if verbose:
        print('Parsing performance data...')

    dfs = {}
    hardware_info = {}
    for item in sorted(os.listdir(collectl_datadir)):
        if not item.endswith('.raw.gz'):
            continue

        df, hardware = load_collectl(
            os.path.join(collectl_datadir, item), start_time, end_time)
        if len(df) == 0:
            continue

        host = re.sub(r'-\d{8}-\d{6}\.raw\.gz$', '', item)
        hardware_info[host] = hardware
        if host not in dfs:
            dfs[host] = df
        else:
            old_df = dfs[host]
            dfs[host] = pd.concat([old_df, df])

    for host, df in dfs.iteritems():
        if verbose:
            print('Generating CPU graph for {}...'.format(host))
        graph = graph_cpu(df, steps, hardware_info[host]['num_cpus'])
        graph.get_figure().savefig(
            os.path.join(outdir, '{}_cpu.png'.format(host)),
            bbox_inches='tight', pad_inches=0.25)
        pylab.close()

        ifaces = set([
            series.split('_')[0]
            for series
             in df.keys()
             if series.startswith(('eth', 'ib'))
        ])

        if verbose:
            print('Generating network graphs for {}...'.format(host))
        graph = graph_net_bytes(df, steps, ifaces)
        graph.get_figure().savefig(
            os.path.join(outdir, '{}_net_bytes.png'.format(host)),
            bbox_inches='tight', pad_inches=0.25)
        pylab.close()

        graph = graph_net_pkts(df, steps, ifaces)
        graph.get_figure().savefig(
            os.path.join(outdir, '{}_net_pkts.png'.format(host)),
            bbox_inches='tight', pad_inches=0.25)
        pylab.close()

        if verbose:
            print('Generating memory graph for {}...'.format(host))
        graph = graph_memory(df, steps, hardware_info[host]["memory"])
        graph.get_figure().savefig(
            os.path.join(outdir, '{}_memory.png'.format(host)),
            bbox_inches='tight', pad_inches=0.25)
        pylab.close()

        if verbose:
            print('Generating storage I/O graph for {}...'.format(host))
        drives = set([
            series.split('_')[0]
            for series
             in df.keys()
             if series.startswith(('sd', 'vd', 'hd', 'xvd'))
        ])
        graph = graph_disk_io(df, steps, drives)
        graph.get_figure().savefig(
            os.path.join(outdir, '{}_disk_io.png'.format(host)),
            bbox_inches='tight', pad_inches=0.25)
        pylab.close()