Python classify_anomaliesの例

プログラミング言語: Python

名前空間/パッケージ名: utils

メソッド/関数: classify_anomalies

hotexamples.comのコード掲載数: 4

Python classify_anomalies - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのutils.classify_anomaliesの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: generator_CPTA.py プロジェクト: marekgr/csit

def _generate_trending_traces(in_data, job_name, build_info,
                              show_trend_line=True, name="", color=""):
    """Generate the trending traces:
     - samples,
     - outliers, regress, progress
     - average of normal samples (trending line)

    :param in_data: Full data set.
    :param job_name: The name of job which generated the data.
    :param build_info: Information about the builds.
    :param show_trend_line: Show moving median (trending plot).
    :param name: Name of the plot
    :param color: Name of the color for the plot.
    :type in_data: OrderedDict
    :type job_name: str
    :type build_info: dict
    :type show_trend_line: bool
    :type name: str
    :type color: str
    :returns: Generated traces (list) and the evaluated result.
    :rtype: tuple(traces, result)
    """

    data_x = list(in_data.keys())
    data_y = list(in_data.values())

    hover_text = list()
    xaxis = list()
    for idx in data_x:
        if "dpdk" in job_name:
            hover_text.append("dpdk-ref: {0}<br>csit-ref: mrr-weekly-build-{1}".
                              format(build_info[job_name][str(idx)][1].
                                     rsplit('~', 1)[0], idx))
        elif "vpp" in job_name:
            hover_text.append("vpp-ref: {0}<br>csit-ref: mrr-daily-build-{1}".
                              format(build_info[job_name][str(idx)][1].
                                     rsplit('~', 1)[0], idx))
        date = build_info[job_name][str(idx)][0]
        xaxis.append(datetime(int(date[0:4]), int(date[4:6]), int(date[6:8]),
                              int(date[9:11]), int(date[12:])))

    data_pd = pd.Series(data_y, index=xaxis)

    anomaly_classification, avgs = classify_anomalies(data_pd)

    anomalies = pd.Series()
    anomalies_colors = list()
    anomalies_avgs = list()
    anomaly_color = {
        "outlier": 0.0,
        "regression": 0.33,
        "normal": 0.66,
        "progression": 1.0
    }
    if anomaly_classification:
        for idx, item in enumerate(data_pd.items()):
            if anomaly_classification[idx] in \
                    ("outlier", "regression", "progression"):
                anomalies = anomalies.append(pd.Series([item[1], ],
                                                       index=[item[0], ]))
                anomalies_colors.append(
                    anomaly_color[anomaly_classification[idx]])
                anomalies_avgs.append(avgs[idx])
        anomalies_colors.extend([0.0, 0.33, 0.66, 1.0])

    # Create traces

    trace_samples = plgo.Scatter(
        x=xaxis,
        y=data_y,
        mode='markers',
        line={
            "width": 1
        },
        legendgroup=name,
        name="{name}-thput".format(name=name),
        marker={
            "size": 5,
            "color": color,
            "symbol": "circle",
        },
        text=hover_text,
        hoverinfo="x+y+text+name"
    )
    traces = [trace_samples, ]

    if show_trend_line:
        trace_trend = plgo.Scatter(
            x=xaxis,
            y=avgs,
            mode='lines',
            line={
                "shape": "linear",
                "width": 1,
                "color": color,
            },
            legendgroup=name,
            name='{name}-trend'.format(name=name)
        )
        traces.append(trace_trend)

    trace_anomalies = plgo.Scatter(
        x=anomalies.keys(),
        y=anomalies_avgs,
        mode='markers',
        hoverinfo="none",
        showlegend=True,
        legendgroup=name,
        name="{name}-anomalies".format(name=name),
        marker={
            "size": 15,
            "symbol": "circle-open",
            "color": anomalies_colors,
            "colorscale": [[0.00, "grey"],
                           [0.25, "grey"],
                           [0.25, "red"],
                           [0.50, "red"],
                           [0.50, "white"],
                           [0.75, "white"],
                           [0.75, "green"],
                           [1.00, "green"]],
            "showscale": True,
            "line": {
                "width": 2
            },
            "colorbar": {
                "y": 0.5,
                "len": 0.8,
                "title": "Circles Marking Data Classification",
                "titleside": 'right',
                "titlefont": {
                    "size": 14
                },
                "tickmode": 'array',
                "tickvals": [0.125, 0.375, 0.625, 0.875],
                "ticktext": ["Outlier", "Regression", "Normal", "Progression"],
                "ticks": "",
                "ticklen": 0,
                "tickangle": -90,
                "thickness": 10
            }
        }
    )
    traces.append(trace_anomalies)

    if anomaly_classification:
        return traces, anomaly_classification[-1]
    else:
        return traces, None

コード例 #2

ファイルを表示

ファイル: generator_tables.py プロジェクト: preym17/csit

def table_performance_trending_dashboard(table, input_data):
    """Generate the table(s) with algorithm:
    table_performance_trending_dashboard
    specified in the specification file.

    :param table: Table to generate.
    :param input_data: Data to process.
    :type table: pandas.Series
    :type input_data: InputData
    """

    logging.info("  Generating the table {0} ...".format(table.get(
        "title", "")))

    # Transform the data
    logging.info("    Creating the data set for the {0} '{1}'.".format(
        table.get("type", ""), table.get("title", "")))
    data = input_data.filter_data(table, continue_on_error=True)

    # Prepare the header of the tables
    header = [
        "Test Case", "Trend [Mpps]", "Short-Term Change [%]",
        "Long-Term Change [%]", "Regressions [#]", "Progressions [#]"
    ]
    header_str = ",".join(header) + "\n"

    # Prepare data to the table:
    tbl_dict = dict()
    for job, builds in table["data"].items():
        for build in builds:
            for tst_name, tst_data in data[job][str(build)].iteritems():
                if tst_name.lower() in table["ignore-list"]:
                    continue
                if tbl_dict.get(tst_name, None) is None:
                    groups = re.search(REGEX_NIC, tst_data["parent"])
                    if not groups:
                        continue
                    nic = groups.group(0)
                    tbl_dict[tst_name] = {
                        "name": "{0}-{1}".format(nic, tst_data["name"]),
                        "data": OrderedDict()
                    }
                try:
                    tbl_dict[tst_name]["data"][str(build)] = \
                        tst_data["result"]["receive-rate"]
                except (TypeError, KeyError):
                    pass  # No data in output.xml for this test

    tbl_lst = list()
    for tst_name in tbl_dict.keys():
        data_t = tbl_dict[tst_name]["data"]
        if len(data_t) < 2:
            continue

        classification_lst, avgs = classify_anomalies(data_t)

        win_size = min(len(data_t), table["window"])
        long_win_size = min(len(data_t), table["long-trend-window"])

        try:
            max_long_avg = max(
                [x for x in avgs[-long_win_size:-win_size] if not isnan(x)])
        except ValueError:
            max_long_avg = nan
        last_avg = avgs[-1]
        avg_week_ago = avgs[max(-win_size, -len(avgs))]

        if isnan(last_avg) or isnan(avg_week_ago) or avg_week_ago == 0.0:
            rel_change_last = nan
        else:
            rel_change_last = round(
                ((last_avg - avg_week_ago) / avg_week_ago) * 100, 2)

        if isnan(max_long_avg) or isnan(last_avg) or max_long_avg == 0.0:
            rel_change_long = nan
        else:
            rel_change_long = round(
                ((last_avg - max_long_avg) / max_long_avg) * 100, 2)

        if classification_lst:
            if isnan(rel_change_last) and isnan(rel_change_long):
                continue
            if (isnan(last_avg) or isnan(rel_change_last)
                    or isnan(rel_change_long)):
                continue
            tbl_lst.append([
                tbl_dict[tst_name]["name"],
                round(last_avg / 1000000, 2), rel_change_last, rel_change_long,
                classification_lst[-win_size:].count("regression"),
                classification_lst[-win_size:].count("progression")
            ])

    tbl_lst.sort(key=lambda rel: rel[0])

    tbl_sorted = list()
    for nrr in range(table["window"], -1, -1):
        tbl_reg = [item for item in tbl_lst if item[4] == nrr]
        for nrp in range(table["window"], -1, -1):
            tbl_out = [item for item in tbl_reg if item[5] == nrp]
            tbl_out.sort(key=lambda rel: rel[2])
            tbl_sorted.extend(tbl_out)

    file_name = "{0}{1}".format(table["output-file"], table["output-file-ext"])

    logging.info("    Writing file: '{0}'".format(file_name))
    with open(file_name, "w") as file_handler:
        file_handler.write(header_str)
        for test in tbl_sorted:
            file_handler.write(",".join([str(item) for item in test]) + '\n')

    txt_file_name = "{0}.txt".format(table["output-file"])
    logging.info("    Writing file: '{0}'".format(txt_file_name))
    convert_csv_to_pretty_txt(file_name, txt_file_name)

コード例 #3

ファイルを表示

def _generate_trending_traces(in_data,
                              job_name,
                              build_info,
                              show_trend_line=True,
                              name="",
                              color=""):
    """Generate the trending traces:
     - samples,
     - outliers, regress, progress
     - average of normal samples (trending line)

    :param in_data: Full data set.
    :param job_name: The name of job which generated the data.
    :param build_info: Information about the builds.
    :param show_trend_line: Show moving median (trending plot).
    :param name: Name of the plot
    :param color: Name of the color for the plot.
    :type in_data: OrderedDict
    :type job_name: str
    :type build_info: dict
    :type show_trend_line: bool
    :type name: str
    :type color: str
    :returns: Generated traces (list) and the evaluated result.
    :rtype: tuple(traces, result)
    """

    data_x = list(in_data.keys())
    data_y = list(in_data.values())

    hover_text = list()
    xaxis = list()
    for idx in data_x:
        date = build_info[job_name][str(idx)][0]
        hover_str = ("date: {date}<br>"
                     "value: {value:,}<br>"
                     "{sut}-ref: {build}<br>"
                     "csit-ref: mrr-{period}-build-{build_nr}<br>"
                     "testbed: {testbed}")
        if "dpdk" in job_name:
            hover_text.append(
                hover_str.format(
                    date=date,
                    value=int(in_data[idx].avg),
                    sut="dpdk",
                    build=build_info[job_name][str(idx)][1].rsplit('~', 1)[0],
                    period="weekly",
                    build_nr=idx,
                    testbed=build_info[job_name][str(idx)][2]))
        elif "vpp" in job_name:
            hover_text.append(
                hover_str.format(
                    date=date,
                    value=int(in_data[idx].avg),
                    sut="vpp",
                    build=build_info[job_name][str(idx)][1].rsplit('~', 1)[0],
                    period="daily",
                    build_nr=idx,
                    testbed=build_info[job_name][str(idx)][2]))

        xaxis.append(
            datetime(int(date[0:4]), int(date[4:6]), int(date[6:8]),
                     int(date[9:11]), int(date[12:])))

    data_pd = OrderedDict()
    for key, value in zip(xaxis, data_y):
        data_pd[key] = value

    anomaly_classification, avgs = classify_anomalies(data_pd)

    anomalies = OrderedDict()
    anomalies_colors = list()
    anomalies_avgs = list()
    anomaly_color = {"regression": 0.0, "normal": 0.5, "progression": 1.0}
    if anomaly_classification:
        for idx, (key, value) in enumerate(data_pd.iteritems()):
            if anomaly_classification[idx] in \
                    ("outlier", "regression", "progression"):
                anomalies[key] = value
                anomalies_colors.append(
                    anomaly_color[anomaly_classification[idx]])
                anomalies_avgs.append(avgs[idx])
        anomalies_colors.extend([0.0, 0.5, 1.0])

    # Create traces

    trace_samples = plgo.Scatter(x=xaxis,
                                 y=[y.avg for y in data_y],
                                 mode='markers',
                                 line={"width": 1},
                                 showlegend=True,
                                 legendgroup=name,
                                 name="{name}".format(name=name),
                                 marker={
                                     "size": 5,
                                     "color": color,
                                     "symbol": "circle",
                                 },
                                 text=hover_text,
                                 hoverinfo="text")
    traces = [
        trace_samples,
    ]

    if show_trend_line:
        trace_trend = plgo.Scatter(
            x=xaxis,
            y=avgs,
            mode='lines',
            line={
                "shape": "linear",
                "width": 1,
                "color": color,
            },
            showlegend=False,
            legendgroup=name,
            name='{name}'.format(name=name),
            text=["trend: {0:,}".format(int(avg)) for avg in avgs],
            hoverinfo="text+name")
        traces.append(trace_trend)

    trace_anomalies = plgo.Scatter(
        x=anomalies.keys(),
        y=anomalies_avgs,
        mode='markers',
        hoverinfo="none",
        showlegend=False,
        legendgroup=name,
        name="{name}-anomalies".format(name=name),
        marker={
            "size":
            15,
            "symbol":
            "circle-open",
            "color":
            anomalies_colors,
            "colorscale": [[0.00, "red"], [0.33, "red"], [0.33, "white"],
                           [0.66, "white"], [0.66, "green"], [1.00, "green"]],
            "showscale":
            True,
            "line": {
                "width": 2
            },
            "colorbar": {
                "y": 0.5,
                "len": 0.8,
                "title": "Circles Marking Data Classification",
                "titleside": 'right',
                "titlefont": {
                    "size": 14
                },
                "tickmode": 'array',
                "tickvals": [0.167, 0.500, 0.833],
                "ticktext": ["Regression", "Normal", "Progression"],
                "ticks": "",
                "ticklen": 0,
                "tickangle": -90,
                "thickness": 10
            }
        })
    traces.append(trace_anomalies)

    if anomaly_classification:
        return traces, anomaly_classification[-1]
    else:
        return traces, None

コード例 #4

ファイルを表示

ファイル: generator_tables.py プロジェクト: marekgr/csit

def table_performance_trending_dashboard(table, input_data):
    """Generate the table(s) with algorithm: table_performance_comparison
    specified in the specification file.

    :param table: Table to generate.
    :param input_data: Data to process.
    :type table: pandas.Series
    :type input_data: InputData
    """

    logging.info("  Generating the table {0} ...".format(table.get(
        "title", "")))

    # Transform the data
    logging.info("    Creating the data set for the {0} '{1}'.".format(
        table.get("type", ""), table.get("title", "")))
    data = input_data.filter_data(table, continue_on_error=True)

    # Prepare the header of the tables
    header = [
        "Test Case", "Trend [Mpps]", "Short-Term Change [%]",
        "Long-Term Change [%]", "Regressions [#]", "Progressions [#]",
        "Outliers [#]"
    ]
    header_str = ",".join(header) + "\n"

    # Prepare data to the table:
    tbl_dict = dict()
    for job, builds in table["data"].items():
        for build in builds:
            for tst_name, tst_data in data[job][str(build)].iteritems():
                if tst_name.lower() in table["ignore-list"]:
                    continue
                if tbl_dict.get(tst_name, None) is None:
                    name = "{0}-{1}".format(
                        tst_data["parent"].split("-")[0],
                        "-".join(tst_data["name"].split("-")[1:]))
                    tbl_dict[tst_name] = {"name": name, "data": OrderedDict()}
                try:
                    tbl_dict[tst_name]["data"][str(build)] =  \
                        tst_data["result"]["throughput"]
                except (TypeError, KeyError):
                    pass  # No data in output.xml for this test

    tbl_lst = list()
    for tst_name in tbl_dict.keys():
        if len(tbl_dict[tst_name]["data"]) < 3:
            continue

        pd_data = pd.Series(tbl_dict[tst_name]["data"])
        data_t, _ = split_outliers(pd_data,
                                   outlier_const=1.5,
                                   window=table["window"])
        last_key = data_t.keys()[-1]
        win_size = min(data_t.size, table["window"])
        win_first_idx = data_t.size - win_size
        key_14 = data_t.keys()[win_first_idx]
        long_win_size = min(data_t.size, table["long-trend-window"])
        median_t = data_t.rolling(window=win_size, min_periods=2).median()
        median_first_idx = median_t.size - long_win_size
        try:
            max_median = max([
                x for x in median_t.values[median_first_idx:-win_size]
                if not isnan(x)
            ])
        except ValueError:
            max_median = nan
        try:
            last_median_t = median_t[last_key]
        except KeyError:
            last_median_t = nan
        try:
            median_t_14 = median_t[key_14]
        except KeyError:
            median_t_14 = nan

        if isnan(last_median_t) or isnan(median_t_14) or median_t_14 == 0.0:
            rel_change_last = nan
        else:
            rel_change_last = round(
                ((last_median_t - median_t_14) / median_t_14) * 100, 2)

        if isnan(max_median) or isnan(last_median_t) or max_median == 0.0:
            rel_change_long = nan
        else:
            rel_change_long = round(
                ((last_median_t - max_median) / max_median) * 100, 2)

        # Classification list:
        classification_lst = classify_anomalies(data_t, window=14)

        if classification_lst:
            if isnan(rel_change_last) and isnan(rel_change_long):
                continue
            tbl_lst.append([
                tbl_dict[tst_name]["name"],
                '-' if isnan(last_median_t) else round(last_median_t /
                                                       1000000, 2),
                '-' if isnan(rel_change_last) else rel_change_last,
                '-' if isnan(rel_change_long) else rel_change_long,
                classification_lst[win_first_idx:].count("regression"),
                classification_lst[win_first_idx:].count("progression"),
                classification_lst[win_first_idx:].count("outlier")
            ])

    tbl_lst.sort(key=lambda rel: rel[0])

    tbl_sorted = list()
    for nrr in range(table["window"], -1, -1):
        tbl_reg = [item for item in tbl_lst if item[4] == nrr]
        for nrp in range(table["window"], -1, -1):
            tbl_pro = [item for item in tbl_reg if item[5] == nrp]
            for nro in range(table["window"], -1, -1):
                tbl_out = [item for item in tbl_pro if item[6] == nro]
                tbl_out.sort(key=lambda rel: rel[2])
                tbl_sorted.extend(tbl_out)

    file_name = "{0}{1}".format(table["output-file"], table["output-file-ext"])

    logging.info("    Writing file: '{0}'".format(file_name))
    with open(file_name, "w") as file_handler:
        file_handler.write(header_str)
        for test in tbl_sorted:
            file_handler.write(",".join([str(item) for item in test]) + '\n')

    txt_file_name = "{0}.txt".format(table["output-file"])
    txt_table = None
    logging.info("    Writing file: '{0}'".format(txt_file_name))
    with open(file_name, 'rb') as csv_file:
        csv_content = csv.reader(csv_file, delimiter=',', quotechar='"')
        for row in csv_content:
            if txt_table is None:
                txt_table = prettytable.PrettyTable(row)
            else:
                txt_table.add_row(row)
        txt_table.align["Test case"] = "l"
    with open(txt_file_name, "w") as txt_file:
        txt_file.write(str(txt_table))