Esempio n. 1
0
def get_results(filter_expression="", row_types=['runattr', 'itervar', 'config', 'scalar', 'vector', 'statistic', 'histogram', 'param', 'attr'], omit_unused_columns=True, start_time=-inf, end_time=inf):
    """
    Returns a filtered set of results and metadata in CSV-like format.
    The items can be any type, even mixed together in a single `DataFrame`.
    They are selected from the complete set of data referenced by the analysis file (`.anf`),
    including only those for which the given `filter_expression` evaluates to `True`.

    # Parameters

    - **filter_expression** *(string)*: The filter expression to select the desired items. Example: `module =~ "*host*" AND name =~ "numPacket*"`
    - **row_types**: Optional. When given, filters the returned rows by type. Should be a unique list, containing any number of these strings:
      `"runattr"`, `"itervar"`, `"config"`, `"scalar"`, `"vector"`, `"statistic"`, `"histogram"`, `"param"`, `"attr"`
    - **omit_unused_columns** *(bool)*: Optional. If `True`, all columns that would only contain `None` are removed from the returned DataFrame
    - **start_time**, **end_time** *(double)*: Optional time limits to trim the data of vector type results.
      The unit is seconds, both the `vectime` and `vecvalue` arrays will be affected, the interval is left-closed, right-open.

    # Columns of the returned DataFrame

    - **runID** *(string)*:  Identifies the simulation run
    - **type** *(string)*: Row type, one of the following: scalar, vector, statistics, histogram, runattr, itervar, param, attr
    - **module** *(string)*: Hierarchical name (a.k.a. full path) of the module that recorded the result item
    - **name** *(string)*: Name of the result item (scalar, statistic, histogram or vector)
    - **attrname** *(string)*: Name of the run attribute or result item attribute (in the latter case, the module and name columns identify the result item the attribute belongs to)
    - **attrvalue** *(string)*: Value of run and result item attributes, iteration variables, saved ini param settings (runattr, attr, itervar, param)
    - **value** *(double or string)*:  Output scalar or attribute value
    - **count**, **sumweights**, **mean**, **min**, **max**, **stddev** *(double)*: Fields of the statistics or histogram
    - **binedges**, **binvalues** *(np.array)*: Histogram bin edges and bin values, as space-separated lists. `len(binedges)==len(binvalues)+1`
    - **underflows**, **overflows** *(double)*: Sum of weights (or counts) of underflown and overflown samples of histograms
    - **vectime**, **vecvalue** *(np.array)*: Output vector time and value arrays, as space-separated lists
    """
    return impl.get_results(**locals())
Esempio n. 2
0
def get_results(filter_or_dataframe="",
                row_types=None,
                omit_unused_columns=True,
                include_fields_as_scalars=False,
                start_time=-inf,
                end_time=inf):
    """
    Returns a filtered set of results and metadata in a Pandas `DataFrame`. The
    items can be any type, even mixed together in a single `DataFrame`. They are
    selected from the complete set of data referenced by the analysis file
    (`.anf`), including only those for which the given `filter_or_dataframe`
    evaluates to `True`.

    Parameters:

    - `filter_or_dataframe` (string or dataframe): The filter expression to
      select the desired items from the inputs, or a dataframe in the "raw"
      format. Example: `module =~ "*host*" AND name =~ "numPacket*"`
    - `row_types`: Optional. When given, filters the returned rows by type.
      Should be a unique list, containing any number of these strings:
      `"runattr"`, `"itervar"`, `"config"`, `"scalar"`, `"vector"`,
      `"statistic"`, `"histogram"`, `"param"`, `"attr"`
    - `omit_unused_columns` (bool): Optional. If `True`, all columns that would
      only contain `None` are removed from the returned DataFrame
    - `include_fields_as_scalars` (bool): Optional. If `True`, the fields of
      statistics and histograms (`:min`, `:mean`, etc.) are also returned as
      synthetic scalars.
    - `start_time`, `end_time` (double): Optional time limits to trim the data
      of vector type results. The unit is seconds, both the `vectime` and
      `vecvalue` arrays will be affected, the interval is left-closed,
      right-open.

    Returns: a `DataFrame` in the "raw" format (see the corresponding section of
    the module documentation for details).
    """
    if type(filter_or_dataframe) is str:
        filter_expression = filter_or_dataframe
        del filter_or_dataframe
        return impl.get_results(**locals())
    else:
        if include_fields_as_scalars:
            raise ValueError(
                "include_fields_as_scalars is not supported when filter_or_dataframe is a dataframe"
            )

        df = filter_or_dataframe
        if row_types is not None:
            df = df[df["type"].isin(row_types)]

        if omit_unused_columns:
            df = df.dropna(axis='columns', how='all')

        df.reset_index(inplace=True, drop=True)

        if start_time != -inf or end_time != inf:

            def crop(row):
                t = row['vectime']
                v = row['vecvalue']

                from_index = np.searchsorted(t, start_time, 'left')
                to_index = np.searchsorted(t, end_time, 'left')

                row['vectime'] = t[from_index:to_index]
                row['vecvalue'] = v[from_index:to_index]

                return row

            df = df.transform(crop, axis='columns')

        return df