예제 #1
0
def _2d_var_detection(data, var, dvar, window, dist, thres, min_levels, verbose=0):
    """ Detect Breakpoints from two variables

    1. SNHT per level (both variables)
    2. Significant peaks within proximity of each other ?

    Parameters
    ----------
    data
    var
    dvar
    window
    dist
    thres
    min_levels
    verbose

    Returns
    -------

    """
    # TODO 2d_var_detection per level
    stest1 = np.squeeze(np.apply_along_axis(snht, 0, data[var].values, window, window / 4))
    stest2 = np.squeeze(np.apply_along_axis(snht, 0, data[dvar].values, window, window / 4))
    data['%s_snht' % var] = stest1
    data['%s_snht' % dvar] = stest2
    data['%s_breaks' % var] = 0

    imax1 = np.asarray(local_maxima(np.sum(stest1, 1), dist=dist))  # for all levels
    imax2 = np.asarray(local_maxima(np.sum(stest2, 1), dist=dist))  # for all levels
    # TODO how to combine !?
    # TODO add => summarized peak
    # TODO are there sign. breaks in the other variable?
    if len(imax1) == 0:
        print "No Breakpoints detected: %s min_levels (%d) found: %f" % (var, min_levels, np.max(stest1))
        return data

    if verbose > 0:
        print "Local Maxima (%s): %d" % (var, len(imax1))

    # how many above threshold
    tstat = np.sum(stest1[imax1, :] >= thres, 1)
    if not np.any(tstat > min_levels):
        print "No Breakpoints detected: %s min_levels (%d) found: %d" % (var, min_levels, np.max(tstat))
    else:
        print "Final Breaks (%s): %d" % (var, np.sum(tstat > min_levels))

    itx = imax1[tstat >= min_levels]  # indices of breaks
    ivar = data.items.get_loc("%s_breaks" % var)  # index of variable
    iarray = 1 + np.int_(stest1[itx, :] > thres)
    data.iloc[ivar, itx, :] = iarray[np.newaxis, ::]  # (1,...) is required to set
    return
예제 #2
0
def _2d_detection(data, var, window, dist, thres, min_levels, verbose=0):
    """ Detect Breakpoints in 2D (time x pressure levels)

    1. Run SNHT per level
    2. Count significant peaks from all levels
    2.1. Local Maxima (>END)
    2.2. Above Threshold
    3. add breaks variable with 1 and 2 (break in that level)

    Parameters
    ----------
    data        Panel       Inputdata (vars, time, p)
    var         str         Variable to consider
    window      float       Window size of Detection
    dist        float       distance between breakpoints
    thres       int         SNHT threshold
    min_levels  int         Minimum required levels to detect breakpoint
    verbose

    Returns
    -------

    """
    stest = np.squeeze(np.apply_along_axis(snht, 0, data[var].values, window, window / 4))  # per level
    data['%s_snht' % var] = stest
    data['%s_breaks' % var] = 0
    imax = np.asarray(local_maxima(np.sum(stest, 1), dist=dist))  # for all levels
    if len(imax) == 0:
        print_verbose(
            "No Breakpoints detected: %s min_levels (%d) found: %f (%f)" % (var, min_levels, np.max(stest), thres),
            verbose)
        return False

    print_verbose("Local Maxima (%s): %d" % (var, len(imax)), verbose)

    # how many above threshold
    tstat = np.sum(stest[imax, :] > thres, 1)  # could weight levels ? upper levels are less relevant?
    if not np.any(tstat >= min_levels):
        print_verbose("No Breakpoints detected: %s min_levels (%d) found: %d" % (var, min_levels, np.max(tstat)),
                      verbose)
        return False
    else:
        if verbose > 1:
            print "Breaks (%s): " % var
            for i, ib in enumerate(tstat):
                print "%s (%d) %s" % (str(data.major_axis[imax[i]]), ib, color_boolean(ib >= min_levels))

        print_verbose("Final Breaks (%s): %d" % (var, np.sum(tstat >= min_levels)), verbose)

    itx = imax[tstat >= min_levels]  # indices of breaks
    ivar = data.items.get_loc("%s_breaks" % var)  # index of variable
    iarray = 1 + np.int_(stest[itx, :] > thres)
    data.iloc[ivar, itx, :] = iarray[np.newaxis, ::]  # (1,...) is required to set
    return True
예제 #3
0
def _1d_detection(data, var, window, dist, thres, verbose=0):
    stest = snht(data[var].values, window, window / 4)
    data['%s_snht' % var] = stest

    # could be too close together
    imax = np.asarray(local_maxima(stest, dist=dist))  # local maxima within a certain distance!
    if len(imax) == 0:
        print_verbose("No Breakpoints detected: %s found: %f (%f)" % (var, np.max(stest), thres), verbose)
        return False

    print_verbose("Local Maxima (%s): %d" % (var, len(imax)), verbose)
    tstat = stest[imax] > thres  # above threshold
    if not np.any(tstat > 0):
        print_verbose("No Breakpoints detected: %s" % (var), verbose)
        return False
    else:
        print_verbose("Final Breaks (%s): %d" % (var, np.sum(tstat > 0)), verbose)

    data['%s_breaks' % var] = 0
    ivar = data.columns.get_loc('%s_breaks' % var)
    data.iloc[imax[tstat > 0], ivar] = 2
    return True
예제 #4
0
def detection_wrapper(arg):
    from snht import snht
    from support_functions import local_maxima

    global testdata
    # data needs to be shared too all processes ?
    window, dist, thres, min_levels = arg
    params = {"window": window, "dist": dist, "thres": thres, "level": min_levels, "nbreaks": 0, "vert": -1}
    #
    stest = np.squeeze(np.apply_along_axis(snht, 0, testdata.values, window, window / 4))  # per level
    imax = np.asarray(local_maxima(np.sum(stest, 1), dist=dist))  # for all levels
    if len(imax) == 0:
        return params, []  # no local maxima

    tstat = np.sum(stest[imax, :] > thres, 1)
    if not np.any(tstat >= min_levels):
        return params, []  # not enough above threshold

    itx = imax[tstat >= min_levels]  # indices of breaks
    ibreaks = testdata.index[itx].tolist()
    itl = np.median(np.where(stest[itx] > thres)[1])  # above threshold / mean of levels -> vertical position
    params.update({"nbreaks": len(itx), "vert": itl})
    return params, ibreaks