Example #1
0
def get_total_order(name,curve,eps):
    '''
    For a given (named) curve and threshold eps, find the eps-minimum intervals of
    both the maxima and minima of the curve.
    :param name: A string uniquely identifying the time series (curve).
    :param curve: dict with float times keying float function values
    :param eps: float threshold (noise level) with 0 < eps < 1.
    :return: sorted list of tuples where first element is sublevel set interval and
             second element is name + extrema type
    '''
    n = curve.normalize()
    r = curve.normalize_reflect()
    merge_tree_mins = tmt.births_only(n)
    merge_tree_maxs = tmt.births_only(r)
    # time_ints_mins = ss.minimal_time_ints(merge_tree_mins,n,eps)
    # time_ints_maxs = ss.minimal_time_ints(merge_tree_maxs,r,eps)
    time_ints_mins = ss.get_sublevel_sets(merge_tree_mins,n,eps)
    time_ints_maxs = ss.get_sublevel_sets(merge_tree_maxs,r,eps)
    labeled_mins = sorted([(v,(name,"min")) for _,v in time_ints_mins.items()])
    labeled_maxs = sorted([(v,(name,"max")) for _,v in time_ints_maxs.items()])
    # When eps is close to (b-a)/2 for max b and min a, then the intervals can be identical. Annihilate them.
    nodes = annihilate(sorted(labeled_mins+labeled_maxs))
    # check that extrema do oscillate
    extrema = [n[1] for n in nodes]
    if any(x==y for (x,y) in zip(extrema[:-1],extrema[1:])):
        # Should never get two minima or two maxima in a row. If there are, a bug fix is required.
        raise ValueError("Two minima or two maxima in a row: {}.".format(nodes))
    # make within time series edges; [a,b] < [c,d] only if a < c
    edges = [(i,j) for i, n in enumerate(nodes) for j, m in enumerate(nodes) if n[0][0] < m[0][0] or n[0][1] < m[0][1]]
    return nodes, edges
def getintervals(filename, filestyle, epsilons, names, start_time, end_time):
    '''

    :param filename: Name of the time series data file. Include absolute or relative path to file.
    :param filestyle: "row" if the time points lie in a single row, or "col" if they lie in a column
    :param epsilons: list of floats between 0 and 1
    :param names: list of gene names on which to create intervals. If "all" (default), then all genes in the file are used
    :param start_time: front trim for time series in time units
    :param end_time: back trim for time series in time units
    :return: dict of tuples of (eps, intervals), one item for each name in names
    '''

    subset_curves = getcurves(filename, filestyle, names, start_time, end_time)
    intervals = dict()
    for name, curve in subset_curves.items():
        intervals[name] = []
        for eps in epsilons:
            n = curve.normalize()
            r = curve.normalize_reflect()
            merge_tree_mins = tmt.births_only(n)
            merge_tree_maxs = tmt.births_only(r)
            time_ints_mins = ss.get_sublevel_sets(merge_tree_mins, n, eps)
            time_ints_maxs = ss.get_sublevel_sets(merge_tree_maxs, r, eps)
            labeled_mins = sorted([(v, (name, "min"))
                                   for _, v in time_ints_mins.items()])
            labeled_maxs = sorted([(v, (name, "max"))
                                   for _, v in time_ints_maxs.items()])
            # When eps is close to (b-a)/2 for max b and min a, then the intervals can be identical. Annihilate them.
            nodes = posets.annihilate(sorted(labeled_mins + labeled_maxs))
            intervals[name].append((eps, nodes))
    return intervals
def get_eps_sup(a, curve, eps):
    '''
    Computes epsilon support interval for a local extremum.
    :param curve: curve object
    :param eps: float threshold (noise level) For normalized curves, 0 < eps < 1.
    :param a: the x-coordinate of a point in curve
    :return: epsilon support interval if a is a local extremum, otherwise returns FALSE
    '''
    ex_type_a = get_extremum_type(a, curve.curve)
    if ex_type_a == 'min':
        births_only_merge_tree = births_only(curve.curve)
        eps_sup_a = gssnr(births_only_merge_tree, curve.curve, eps, eps_restriction=False)
    elif ex_type_a == 'max':
        new_curve = Curve(curve.reflect())
        births_only_merge_tree = births_only(new_curve.curve)
        eps_sup_a = gssnr(births_only_merge_tree, new_curve.curve, eps, eps_restriction=False)
    else:
        return False
    return eps_sup_a[a]
def get_min_lives(curve):
    '''
    Calculates the node life for all the local minima in curve.
    :param births_only_merge_tree: merge tree dict with intermediate points removed
    :param curve: dict with times keying function values (should be of the form curve.curve or curve.normalized)
    :return: dict of node lives for each local minima in the curve
    '''
    births_only_merge_tree = births_only(curve)
    min_lives = dict()
    for u, (s, v) in births_only_merge_tree.items():
        if s == v:
            min_lives[u] = ((max(curve.values()) - min(curve.values())) / 2)
        else:
            min_lives[u] = (abs(curve[u] - curve[s]) / 2)
    return min_lives
def test():
    # integer curve 1
    curve = Curve({0: -2, 1: 2, 2: 0, 3: 3, 4: -4, 5: 1, 6: -7})
    births_only_merge_tree = tmt.births_only(curve.curve)
    ti = get_sublevel_sets(births_only_merge_tree, curve.curve, 0.75)
    assert (ti == {0: (0, 1), 2: (1, 3), 4: (3, 5), 6: (5, 6)})
    ti = get_sublevel_sets(births_only_merge_tree, curve.curve, 2)
    assert (ti == {0: (0, 1), 4: (3, 5), 6: (5, 6)})
    ti = get_sublevel_sets(births_only_merge_tree, curve.curve, 3)
    assert (ti == {6: (5, 6)})

    # integer curve 2
    curve2 = Curve({0: 0, 1: -1, 2: -2, 3: 1, 4: 3, 5: 6, 6: 2})
    births_only_merge_tree = tmt.births_only(curve2.curve)
    time_ints = get_sublevel_sets(births_only_merge_tree, curve2.curve, 0.75)
    assert (time_ints == {2: (0, 3), 6: (5, 6)})
    time_ints = get_sublevel_sets(births_only_merge_tree, curve2.curve, 1)
    assert (time_ints == {2: (0, 3), 6: (5, 6)})
    ti = get_sublevel_sets(births_only_merge_tree, curve2.curve, 3)
    assert (ti == {2: (0, 5)})

    # curve 3 with 3 equal points
    curve3 = Curve({
        0: 2,
        1: 0,
        2: 0,
        3: 0,
        4: 2,
        5: 3,
        6: 3,
        7: 3,
        8: 1.5,
        9: 0
    })
    births_only_merge_tree = tmt.births_only(curve3.curve)
    time_ints = get_sublevel_sets(births_only_merge_tree, curve3.curve, 0.5)
    assert (time_ints == {1: (0, 4), 9: (8, 9)})
    time_ints = get_sublevel_sets(births_only_merge_tree, curve3.curve, 1.1)
    assert (time_ints == {1: (0, 5), 9: (7, 9)})
    time_ints = get_sublevel_sets(births_only_merge_tree, curve3.curve, 1.5)
    assert (time_ints == {1: (0, 5), 9: (7, 9)})
    time_ints = get_sublevel_sets(births_only_merge_tree, curve3.curve, 1.6)
    assert (time_ints == {1: (0, 9)})

    # find maxima
    curve4 = Curve(curve3.reflect())
    births_only_merge_tree = tmt.births_only(curve4.curve)
    time_ints = get_sublevel_sets(births_only_merge_tree, curve4.curve, 0.5)
    assert (time_ints == {0: (0, 1), 5: (4, 8)})
    time_ints = get_sublevel_sets(births_only_merge_tree, curve4.curve, 1.1)
    assert (time_ints == {5: (3, 9)})

    # curve 3 with 2 equal points
    curve5 = Curve({0: 2, 1: 0, 2: 0, 3: 2, 4: 3, 5: 3, 6: 1.5, 7: 0})
    births_only_merge_tree = tmt.births_only(curve5.curve)
    time_ints = get_sublevel_sets(births_only_merge_tree, curve5.curve, 0.5)
    assert (time_ints == {1: (0, 3), 7: (6, 7)})
    time_ints = get_sublevel_sets(births_only_merge_tree, curve5.curve, 1.1)
    assert (time_ints == {1: (0, 4), 7: (5, 7)})
    time_ints = get_sublevel_sets(births_only_merge_tree, curve5.curve, 1.5)
    assert (time_ints == {1: (0, 4), 7: (5, 7)})
    time_ints = get_sublevel_sets(births_only_merge_tree, curve5.curve, 1.6)
    assert (time_ints == {1: (0, 7)})

    # constant curve
    curve6 = Curve({0: 2, 1: 2, 2: 2, 3: 2, 4: 2, 5: 2, 6: 2, 7: 2})
    births_only_merge_tree = tmt.births_only(curve6.curve)
    time_ints = get_sublevel_sets(births_only_merge_tree, curve6.curve, 0.5)
    assert (time_ints == {0: (0, 7)})
    print(time_ints)