Beispiel #1
0
def analyse_2d_density(
        exp_list='experiments_completed.txt',
        res_dir='',
        out_dir='',
        source_filter='',
        min_values='3',
        xmetric='throughput',
        ymetric='tcprtt',
        variables='',
        out_name='',
        xmin='0',
        xmax='0',
        ymin='0',
        ymax='0',
        lnames='',
        group_by='aqm',
        replot_only='0',
        pdf_dir='',
        stime='0.0',
        etime='0.0',
        ts_correct='1',
        smoothed='1',
        link_len='0',
        plot_params='',
        plot_script='',
        xstat_index='',
        ystat_index='',
        dupacks='0',
        cum_ackseq='1',
        merge_data='0',
        #sburst='1', eburst='0', test_id_prefix='[0-9]{8}\-[0-9]{6}_experiment_',
        sburst='1',
        eburst='0',
        test_id_prefix='exp_[0-9]{8}\-[0-9]{6}_',
        slowest_only='0',
        query_host=''):
    "2d density / ellipse plot for different experiments"

    test_id_pfx = ''

    check = get_metric_params(xmetric, smoothed, ts_correct)
    if check == None:
        abort('Unknown metric %s specified with xmetric' % xmetric)
    check = get_metric_params(ymetric, smoothed, ts_correct)
    if check == None:
        abort('Unknown metric %s specified with ymetric' % ymetric)

    #if source_filter == '':
    #    abort('Must specify at least one source filter')

    if len(source_filter.split(';')) > 12:
        abort('Cannot have more than 12 filters')

    # XXX more param checking

    # make sure res_dir has valid form (out_dir is handled by extract methods)
    res_dir = valid_dir(res_dir)

    # Initialise source filter data structure
    sfil = SourceFilter(source_filter)

    # read test ids
    experiments = read_experiment_ids(exp_list)

    # get path based on first experiment id
    dir_name = get_first_experiment_path(experiments)

    # if we haven' got the extracted data run extract method(s) first
    if res_dir == '':
        for experiment in experiments:

            (ex_function,
             kwargs) = get_extract_function(xmetric,
                                            link_len,
                                            xstat_index,
                                            sburst=sburst,
                                            eburst=eburst,
                                            slowest_only=slowest_only,
                                            query_host=query_host)

            (dummy, out_files,
             out_groups) = ex_function(test_id=experiment,
                                       out_dir=out_dir,
                                       source_filter=source_filter,
                                       replot_only=replot_only,
                                       ts_correct=ts_correct,
                                       **kwargs)

            (ex_function,
             kwargs) = get_extract_function(ymetric,
                                            link_len,
                                            ystat_index,
                                            sburst=sburst,
                                            eburst=eburst,
                                            slowest_only=slowest_only,
                                            query_host=query_host)

            (dummy, out_files,
             out_groups) = ex_function(test_id=experiment,
                                       out_dir=out_dir,
                                       source_filter=source_filter,
                                       replot_only=replot_only,
                                       ts_correct=ts_correct,
                                       **kwargs)

        if out_dir == '' or out_dir[0] != '/':
            res_dir = dir_name + '/' + out_dir
        else:
            res_dir = out_dir

    else:
        if res_dir[0] != '/':
            res_dir = dir_name + '/' + res_dir

    # make sure we have trailing slash
    res_dir = valid_dir(res_dir)

    if pdf_dir == '':
        pdf_dir = res_dir
    else:
        if pdf_dir[0] != '/':
            pdf_dir = dir_name + '/' + pdf_dir
        pdf_dir = valid_dir(pdf_dir)
        # if pdf_dir specified create if it doesn't exist
        mkdir_p(pdf_dir)

    #
    # build match string from variables
    #

    (match_str, match_str2) = build_match_strings(experiments[0], variables,
                                                  test_id_prefix)

    #
    # filter out the experiments to plot, generate x-axis labels, get test id prefix
    #

    (fil_experiments, test_id_pfx,
     dummy) = filter_experiments(experiments, match_str, match_str2)

    #
    # get groups based on group_by variable
    #

    group_idx = 1
    levels = {}
    groups = []
    leg_names = []
    _experiments = []
    for experiment in fil_experiments:
        level = ''
        add_exp = True
        for g in group_by.split(';'):
            p = experiment.find(g)
            if p > -1:
                s = experiment.find('_', p)
                s += 1
                e = experiment.find('_', s)
                level += g + ':' + experiment[s:e] + ' '
            else:
                add_exp = False
                break

        # remove the final space from the string
        level = level[:-1]

        if add_exp == True:
            _experiments.append(experiment)
            #print('level: ' + level)

            if level not in levels:
                levels[level] = group_idx
                group_idx += 1
                leg_names.append(level)

            if merge_data == '1':
                groups.append(levels[level])
            else:
                for i in range(len(source_filter.split(';'))):
                    groups.append(levels[level])

    fil_experiments = _experiments

    #
    # get metric parameters and list of data files
    #

    # get the metric parameter for both x and y
    x_axis_params = get_metric_params(xmetric, smoothed, ts_correct,
                                      xstat_index, dupacks, cum_ackseq,
                                      slowest_only)
    y_axis_params = get_metric_params(ymetric, smoothed, ts_correct,
                                      ystat_index, dupacks, cum_ackseq,
                                      slowest_only)

    x_ext = x_axis_params[0]
    y_ext = y_axis_params[0]

    # if we merge responders make sure we only use the merged files
    if merge_data == '1':
        # reset source filter so we match the merged file
        sfil.clear()
        sfil = SourceFilter('S_0.0.0.0_0')

    x_files = []
    y_files = []
    for experiment in fil_experiments:
        _x_files = []
        _y_files = []
        _x_ext = x_ext
        _y_ext = y_ext

        _files = get_testid_file_list('', experiment, _x_ext, 'LC_ALL=C sort',
                                      res_dir)
        if merge_data == '1':
            _x_ext += '.all'
            _files = merge_data_files(_files)
        _x_files += _files

        _files = get_testid_file_list('', experiment, _y_ext, 'LC_ALL=C sort',
                                      res_dir)
        if merge_data == '1':
            _y_ext += '.all'
            _files = merge_data_files(_files)
        _y_files += _files

        match_str = '.*_([0-9\.]*_[0-9]*_[0-9\.]*_[0-9]*)[0-9a-z_.]*' + _x_ext
        for f in _x_files:
            #print(f)
            res = re.search(match_str, f)
            #print(res.group(1))
            if res and sfil.is_in(res.group(1)):
                # only add file if enough data points
                rows = int(
                    local('wc -l %s | awk \'{ print $1 }\'' % f, capture=True))
                if rows > int(min_values):
                    x_files.append(f)

        match_str = '.*_([0-9\.]*_[0-9]*_[0-9\.]*_[0-9]*)[0-9a-z_.]*' + _y_ext
        for f in _y_files:
            # print(f)
            res = re.search(match_str, f)
            if res and sfil.is_in(res.group(1)):
                # only add file if enough data points
                rows = int(
                    local('wc -l %s | awk \'{ print $1 }\'' % f, capture=True))
                if rows > int(min_values):
                    y_files.append(f)

    yindexes = [str(x_axis_params[2]), str(y_axis_params[2])]
    yscalers = [str(x_axis_params[3]), str(y_axis_params[3])]
    aggr_flags = [x_axis_params[5], y_axis_params[5]]
    diff_flags = [x_axis_params[6], y_axis_params[6]]

    if lnames != '':
        lnames_arr = lnames.split(';')
        if len(lnames_arr) != len(leg_names):
            abort(
                'Number of legend names must be qual to the number of source filters'
            )
        leg_names = lnames_arr

    print(x_files)
    print(y_files)
    print(groups)
    print(leg_names)

    #
    # pass the data files and auxilary info to plot function
    #

    if out_name != '':
        oprefix = out_name + '_' + test_id_pfx + '_' + xmetric + '_' + ymetric
    else:
        oprefix = test_id_pfx + '_' + xmetric + '_' + ymetric
    title = oprefix

    plot_2d_density(title, x_files, y_files, x_axis_params[1],
                    y_axis_params[1], yindexes, yscalers, 'pdf', oprefix,
                    pdf_dir, x_axis_params[4], y_axis_params[4], aggr_flags,
                    diff_flags, xmin, xmax, ymin, ymax, stime, etime, groups,
                    leg_names, plot_params, plot_script)

    # done
    puts('\n[MAIN] COMPLETED analyse_2d_density %s \n' % test_id_pfx)
Beispiel #2
0
def analyse_cmpexp(
        exp_list='experiments_completed.txt',
        res_dir='',
        out_dir='',
        source_filter='',
        min_values='3',
        omit_const='0',
        metric='throughput',
        ptype='box',
        variables='',
        out_name='',
        ymin='0',
        ymax='0',
        lnames='',
        group_by_prefix='0',
        omit_const_xlab_vars='0',
        replot_only='0',
        pdf_dir='',
        stime='0.0',
        etime='0.0',
        ts_correct='1',
        smoothed='1',
        link_len='0',
        plot_params='',
        plot_script='',
        stat_index='',
        dupacks='0',
        cum_ackseq='1',
        merge_data='0',
        sburst='1',
        #eburst='0', test_id_prefix='[0-9]{8}\-[0-9]{6}_experiment_',
        eburst='0',
        test_id_prefix='exp_[0-9]{8}\-[0-9]{6}_',
        slowest_only='0',
        res_time_mode='0',
        query_host=''):
    "Compare metrics for different experiments"

    if ptype != 'box' and ptype != 'mean' and ptype != 'median':
        abort('ptype must be either box, mean or median')

    check = get_metric_params(metric, smoothed, ts_correct)
    if check == None:
        abort('Unknown metric %s specified' % metric)

    if source_filter == '':
        abort('Must specify at least one source filter')

    if len(source_filter.split(';')) > 12:
        abort('Cannot have more than 12 filters')

    # prevent wrong use of res_time_mode
    if metric != 'restime' and res_time_mode != '0':
        res_time_mode = '0'
    if ptype == 'box' and res_time_mode == '2':
        res_time_mode = '0'

    # XXX more param checking

    # Initialise source filter data structure
    sfil = SourceFilter(source_filter)

    # read test ids
    experiments = read_experiment_ids(exp_list)

    # get path based on first experiment id
    dir_name = get_first_experiment_path(experiments)

    # if we haven' got the extracted data run extract method(s) first
    if res_dir == '':
        for experiment in experiments:

            (ex_function,
             kwargs) = get_extract_function(metric,
                                            link_len,
                                            stat_index,
                                            sburst=sburst,
                                            eburst=eburst,
                                            slowest_only=slowest_only,
                                            query_host=query_host)

            (dummy, out_files,
             out_groups) = ex_function(test_id=experiment,
                                       out_dir=out_dir,
                                       source_filter=source_filter,
                                       replot_only=replot_only,
                                       ts_correct=ts_correct,
                                       **kwargs)

        if out_dir == '' or out_dir[0] != '/':
            res_dir = dir_name + '/' + out_dir
        else:
            res_dir = out_dir
    else:
        if res_dir[0] != '/':
            res_dir = dir_name + '/' + res_dir

    # make sure we have trailing slash
    res_dir = valid_dir(res_dir)

    if pdf_dir == '':
        pdf_dir = res_dir
    else:
        if pdf_dir[0] != '/':
            pdf_dir = dir_name + '/' + pdf_dir
        pdf_dir = valid_dir(pdf_dir)
        # if pdf_dir specified create if it doesn't exist
        mkdir_p(pdf_dir)

    #
    # build match string from variables
    #

    (match_str, match_str2) = build_match_strings(experiments[0], variables,
                                                  test_id_prefix)

    #
    # filter out the experiments to plot, generate x-axis labels, get test id prefix
    #

    (fil_experiments, test_id_pfx,
     xlabs) = filter_experiments(experiments, match_str, match_str2)

    #
    # get out data files based on filtered experiment list and source_filter
    #

    (ext, ylab, yindex, yscaler, sep, aggr,
     diff) = get_metric_params(metric, smoothed, ts_correct, stat_index,
                               dupacks, cum_ackseq, slowest_only)

    if res_time_mode == '1':
        plot_params += ' NOMINAL_RES_TIME="1"'
    if res_time_mode == '2':
        if ptype == 'median':
            ylab = 'Median resp time / nominal resp time'
        elif ptype == 'mean':
            ylab = 'Mean resp time / nominal resp time'
        plot_params += ' RATIO_RES_TIME="1"'

    leg_names = source_filter.split(';')

    # if we merge responders make sure we only use the merged files
    if merge_data == '1':
        # set label to indicate merged data
        leg_names = ['Merged data']
        # reset source filter so we match the merged file
        sfil.clear()
        source_filter = 'S_0.0.0.0_0'
        sfil = SourceFilter(source_filter)

    file_names = []
    for experiment in fil_experiments:
        out_files = {}
        _ext = ext

        files = get_testid_file_list('', experiment, '%s' % _ext,
                                     'LC_ALL=C sort', res_dir)
        if merge_data == '1':
            # change extension
            _ext += '.all'
            files = merge_data_files(files)

        #print(files)
        match_str = '.*_([0-9\.]*_[0-9]*_[0-9\.]*_[0-9]*)[0-9a-z_.]*' + _ext
        for f in files:
            # print(f)
            res = re.search(match_str, f)
            #print(res.group(1))
            if res and sfil.is_in(res.group(1)):
                # only add file if enough data points
                rows = int(
                    local('wc -l %s | awk \'{ print $1 }\'' % f, capture=True))
                if rows > int(min_values):
                    out_files[res.group(1)] = f

        #print(out_files)
        #print(leg_names)
        if len(out_files) < len(leg_names):
            abort(
                'No data files for some of the source filters for experiment %s'
                % experiment)

        sorted_files = sort_by_flowkeys(out_files, source_filter)

        for name, file_name in sorted_files:
            file_names.append(file_name)

    if group_by_prefix == '1':
        # group by test prefix (and flow)

        # first, get all test id prefixes
        test_id_pfxs = {}
        for experiment in fil_experiments:
            res = re.search(match_str2, experiment)
            if res:
                test_id_pfxs[res.group(1)] = 1

        # second, sort files so that same parameter combinations for different
        # prefixes are together
        # if we have multiple prefixes, create legend entry for each
        # prefix+flow combination
        _file_names = [''] * len(file_names)
        _leg_names = []
        pfx_cnt = len(test_id_pfxs)
        i = 0
        j = -1
        last_pfx = ''
        for name in file_names:
            for p in test_id_pfxs:
                if name.find(p) > -1:
                    curr_pfx = p
                    break

            if curr_pfx != last_pfx:
                i = 0
                j += 1
                for l in leg_names:
                    _leg_names.append(curr_pfx + '-' + l)

            _file_names[i * pfx_cnt + j] = name

            i += 1
            last_pfx = curr_pfx

        file_names = _file_names
        leg_names = _leg_names

        # remove duplicates in the x-axis labels
        xlabs = list(set(xlabs))

    if lnames != '':
        lnames_arr = lnames.split(';')
        if len(lnames_arr) != len(leg_names):
            abort(
                'Number of legend names must be qual to the number of source filters'
            )
        leg_names = lnames_arr

    # filter out unchanged variables in the x labels (need at least 2 labels)
    if omit_const_xlab_vars == '1' and len(xlabs) > 1:

        xlabs_arrs = {}
        xlabs_changed = {}

        for i in range(len(xlabs)):
            xlabs_arrs[i] = xlabs[i].split('\n')

        for i in range(len(xlabs_arrs[0])):
            changed = False
            xlab_var = xlabs_arrs[0][i]
            for j in range(1, len(xlabs)):
                if xlabs_arrs[j][i] != xlab_var:
                    changed = True
                    break

            xlabs_changed[i] = changed

        for i in range(len(xlabs)):
            tmp = []
            for j in range(len(xlabs_arrs[i])):
                if xlabs_changed[j]:
                    tmp.append(xlabs_arrs[i][j].replace('_', ' ', 1))

            xlabs[i] = '\n'.join(tmp)

    print(leg_names)
    print(file_names)

    #
    # pass the data files and auxilary info to plot function
    #

    if out_name != '':
        oprefix = out_name + '_' + test_id_pfx + '_' + metric + '_' + ptype
    else:
        oprefix = test_id_pfx + '_' + metric + '_' + ptype
    title = oprefix

    plot_cmpexp(title, file_names, xlabs, ylab, yindex, yscaler, 'pdf',
                oprefix, pdf_dir, sep, aggr, diff, omit_const, ptype, ymin,
                ymax, leg_names, stime, etime, plot_params, plot_script)

    # done
    puts('\n[MAIN] COMPLETED analyse_cmpexp %s \n' % test_id_pfx)