Ejemplo n.º 1
0
def _calc_box(srs: dd.Series, qntls: da.Array, cfg: Config) -> Dict[str, Any]:
    """
    Box plot calculations
    """
    # quartiles
    data = {
        f"qrtl{i + 1}": qntls.loc[qnt].sum()
        for i, qnt in enumerate((0.25, 0.5, 0.75))
    }

    # inter-quartile range
    iqr = data["qrtl3"] - data["qrtl1"]
    srs_iqr = srs[srs.between(data["qrtl1"] - 1.5 * iqr,
                              data["qrtl3"] + 1.5 * iqr)]
    # lower and upper whiskers
    data["lw"], data["uw"] = srs_iqr.min(), srs_iqr.max()

    # outliers
    otlrs = srs[~srs.between(data["qrtl1"] - 1.5 * iqr, data["qrtl3"] +
                             1.5 * iqr)]
    # randomly sample at most 100 outliers from each partition without replacement
    smp_otlrs = otlrs.map_partitions(lambda x: x.sample(min(100, x.shape[0])),
                                     meta=otlrs)
    data["otlrs"] = smp_otlrs.values
    if cfg.insight.enable:
        data["notlrs"] = otlrs.shape[0]

    return data
Ejemplo n.º 2
0
def calc_box(srs: dd.Series, qntls: da.Array) -> Dict[str, Any]:
    """
    Box plot calculations

    Parameters
    ----------
    srs
        one numerical column
    qntls
        quantiles of the column
    """
    data: Dict[str, Any] = {}

    # quartiles
    data["qrtl1"] = qntls.loc[0.25].sum()
    data["qrtl2"] = qntls.loc[0.5].sum()
    data["qrtl3"] = qntls.loc[0.75].sum()
    iqr = data["qrtl3"] - data["qrtl1"]
    srs_iqr = srs[srs.between(data["qrtl1"] - 1.5 * iqr,
                              data["qrtl3"] + 1.5 * iqr)]
    # outliers
    otlrs = srs[~srs.between(data["qrtl1"] - 1.5 * iqr, data["qrtl3"] +
                             1.5 * iqr)]
    # randomly sample at most 100 outliers from each partition without replacement
    smp_otlrs = otlrs.map_partitions(lambda x: x.sample(min(100, x.shape[0])),
                                     meta=otlrs)
    data["lw"] = srs_iqr.min()
    data["uw"] = srs_iqr.max()
    data["otlrs"] = smp_otlrs.values
    ##    if cfg.insights_enable
    data["notlrs"] = otlrs.shape[0]

    return data