Beispiel #1
0
 def test_limit_to_span(self):
     evs = [
         esal.Event(esal.Interval(1, 9), 'a', 1),
     ]
     expected = [
         (esal.Interval(3, 7), 1),
     ]
     actual = list(events.periods(evs, 3, 7))
     self.assertEqual(expected, actual)
Beispiel #2
0
 def test_min_len_limit_to_span(self):
     evs = [
         esal.Event(esal.Interval(1, 1), 'a', 1),
     ]
     expected = [
         (esal.Interval(2, 5), 1),
     ]
     actual = list(events.periods(evs, 2, 5, min_len=7))
     self.assertEqual(expected, actual)
Beispiel #3
0
 def test_backoff_too_much(self):
     evs = [
         esal.Event(esal.Interval(1, 1), 'a', 1),
         esal.Event(esal.Interval(5, 5), 'a', 2),
     ]
     expected = [
         (esal.Interval(1, 1), 1),
         (esal.Interval(5, 9), 2),
     ]
     actual = list(events.periods(evs, 1, 9, min_len=5, backoff=5))
     self.assertEqual(expected, actual)
Beispiel #4
0
 def test_backoff_drop_empty_zero_val(self):
     evs = [
         esal.Event(esal.Interval(1, 2), 'a', 1),
         esal.Event(esal.Interval(3, 4), 'a', 1),
     ]
     expected = [
         (esal.Interval(1, 2), 1),
         (esal.Interval(3, 4), 1),
     ]
     actual = list(events.periods(evs, 0, 4, backoff=1))
     self.assertEqual(expected, actual)
Beispiel #5
0
 def test_fill_span(self):
     evs = [
         esal.Event(esal.Interval(2, 3), 'a', 1),
     ]
     expected = [
         (esal.Interval(1, 2), 0),
         (esal.Interval(2, 3), 1),
         (esal.Interval(3, 4), 0),
     ]
     actual = list(events.periods(evs, 1, 4))
     self.assertEqual(expected, actual)
Beispiel #6
0
 def test_backoff_non_overlapping(self):
     evs = [
         esal.Event(esal.Interval(1, 1), 'a', 1),
         esal.Event(esal.Interval(5, 5), 'a', 2),
     ]
     expected = [
         (esal.Interval(1, 3), 1),
         (esal.Interval(5, 8), 2),
     ]
     actual = list(events.periods(evs, 1, 8, min_len=3, backoff=2))
     self.assertEqual(expected, actual)
Beispiel #7
0
 def test_empty(self):
     evs = []
     expected = [
         (esal.Interval(0, 9), 0),
     ]
     actual = list(events.periods(evs, 0, 9))
     self.assertEqual(expected, actual)
Beispiel #8
0
 def test_zero_values(self):
     evs = [
         esal.Event(esal.Interval(1, 7), 'a', '0'),
     ]
     expected = [
         (esal.Interval(0, 1), 0),
         (esal.Interval(1, 7), '0'),
         (esal.Interval(7, 9), 0),
     ]
     actual = list(events.periods(evs, 0, 9, zero_values=(0, )))
     self.assertEqual(expected, actual)
     expected = [
         (esal.Interval(0, 9), 0),
     ]
     actual = list(events.periods(evs, 0, 9, zero_values=('0', )))
     self.assertEqual(expected, actual)
 def setUp(self):
     ev = esal.Event
     itvl1 = esal.Interval('2019-04-09', '2019-04-10')
     itvl2 = esal.Interval('2019-04-09')
     self.ev_seq = esal.EventSequence(
         events=[
             ev(itvl1, ('mx', '4'), ('lo', None)),
             ev(itvl1, ('mx', '4'), ('hi', None)),
             ev(itvl1, ('mx', '5'), ('lo', None)),
             ev(itvl1, ('mx', '8'), ('hi', None)),
             ev(itvl1, ('mx', '0'), ('lo', None)),
             ev(itvl1, ('mx', '2'), ('ok', None)),
             ev(itvl1, ('mx', '1'), ('ok', None)),
             ev(itvl1, ('mx', '8'), ('ok', None)),
             ev(itvl2, ('dx', '2'), (None, None)),
             ev(itvl2, ('dx', '0'), (None, None)),
             ev(itvl2, ('dx', '7'), (None, None)),
             ev(itvl2, ('dx', '2'), (None, None)),
             ev(itvl2, ('dx', '1'), (None, None)),
             ev(itvl2, ('dx', '3'), (None, None)),
             ev(itvl1, ('rx', '0'), (None, None)),
             ev(itvl1, ('rx', '5'), (None, None)),
             ev(itvl1, ('rx', '0'), (None, None)),
             ev(itvl1, ('rx', '4'), (None, None)),
             ev(itvl1, ('rx', '1'), (None, None)),
             ev(itvl1, ('px', '3'), (None, None)),
             ev(itvl1, ('px', '2'), (None, None)),
             ev(itvl1, ('px', '5'), (None, None)),
             ev(itvl1, ('px', '2'), (None, None)),
             ev(itvl1, ('ox', '0'), (None, None)),
             ev(itvl1, ('ox', '6'), (None, None)),
             ev(itvl1, ('vx', '1'), (None, None)),
             ev(itvl1, ('vx', '1'), (None, None)),
             ev(itvl1, ('xx', None), (None, None)),
         ],
         facts=[
             (('bx', 'dob'), '1949-04-09'),
             (('bx', 'ethn'), '1'),
             (('bx', 'gndr'), 'M'),
             (('bx', 'race'), '2'),
             (('hx', 'cancer-father'), 'yes'),
             (('hx', 'cancer-mother'), 'no'),
         ],
         id=808186755,
     )
Beispiel #10
0
def sequence(
        event_records,
        event_sequence_id=None,
        header_nm2idx=header_nm2idx,
):
    """
    Construct an event sequence from the given records and return it.

    Any record in which the fields `lo` and `hi` are both `None` is
    treated as a fact.  All other records are treated as events.

    event_records:
        Iterable of event records where each record is an indexable
        collection of values.
    event_sequence_id:
        ID for constructed event sequence.
    header_nm2idx:
        Mapping of event record field names to their indices in the
        record.  Must include at least the following names: id, lo, hi,
        cat, typ, val, jsn.
    """
    # Unpack indices of event record fields
    id_idx = header_nm2idx['id']
    lo_idx = header_nm2idx['lo']
    hi_idx = header_nm2idx['hi']
    cat_idx = header_nm2idx['cat']
    typ_idx = header_nm2idx['typ']
    val_idx = header_nm2idx['val']
    jsn_idx = header_nm2idx['jsn']
    # Collect facts and events
    facts = []
    evs = []
    for ev_rec in event_records:
        # Fill in the ID if it hasn't been set
        if event_sequence_id is None:
            event_sequence_id = ev_rec[id_idx]
        # Get the event interval in order to distinguish between facts
        # and events
        lo = ev_rec[lo_idx]
        hi = ev_rec[hi_idx]
        # Missing times indicate a fact
        if lo is None and hi is None:
            fact = ((ev_rec[cat_idx], ev_rec[typ_idx]), ev_rec[val_idx])
            facts.append(fact)
        # Otherwise this record is an event
        else:
            ev = esal.Event(
                esal.Interval(ev_rec[lo_idx], ev_rec[hi_idx]),
                (ev_rec[cat_idx], ev_rec[typ_idx]),
                (ev_rec[val_idx], ev_rec[jsn_idx]))
            evs.append(ev)
    return esal.EventSequence(evs, facts, event_sequence_id)
Beispiel #11
0
 def test_backoff_zero_val(self):
     evs = [
         esal.Event(esal.Interval(2, 3), 'a', 1),
         esal.Event(esal.Interval(5, 6), 'a', 1),
         esal.Event(esal.Interval(9, 10), 'a', 1),
     ]
     expected = [
         (esal.Interval(0, 1), 0),
         (esal.Interval(2, 3), 1),
         (esal.Interval(5, 6), 1),
         (esal.Interval(7, 8), 0),
         (esal.Interval(9, 10), 1),
     ]
     actual = list(events.periods(evs, 0, 11, backoff=1))
     self.assertEqual(expected, actual)
     expected.append((esal.Interval(11, 12), 0))
     actual = list(events.periods(evs, 0, 12, backoff=1))
     self.assertEqual(expected, actual)
Beispiel #12
0
 def test_empty_span(self):
     evs = [
         esal.Event(esal.Interval(3, 5), 'a', 1),
         esal.Event(esal.Interval(4, 6), 'a', 2),
         esal.Event(esal.Interval(5, 5), 'a', 3),
         esal.Event(esal.Interval(5, 7), 'a', 4),
     ]
     expected = [
         (esal.Interval(5, 5), 1),
         (esal.Interval(5, 5), 2),
         (esal.Interval(5, 5), 3),
         (esal.Interval(5, 5), 4),
     ]
     actual = list(events.periods(evs, 5, 5))
     self.assertEqual(expected, actual)
Beispiel #13
0
 def test_min_len(self):
     evs = [
         esal.Event(esal.Interval(1, 1), 'a', 1),
         esal.Event(esal.Interval(4, 5), 'a', 1),
         esal.Event(esal.Interval(7, 9), 'a', 1),
     ]
     expected = [
         (esal.Interval(1, 3), 1),
         (esal.Interval(3, 4), 0),
         (esal.Interval(4, 6), 1),
         (esal.Interval(6, 7), 0),
         (esal.Interval(7, 9), 1),
     ]
     actual = list(events.periods(evs, 1, 9, min_len=2))
     self.assertEqual(expected, actual)
Beispiel #14
0
 def test_min_len_drop_empty_zero_val(self):
     evs = [
         esal.Event(esal.Interval(0, 2), 'a', 0),
         esal.Event(esal.Interval(2, 2), 'a', 1),
         esal.Event(esal.Interval(2, 4), 'a', 0),
         esal.Event(esal.Interval(4, 4), 'a', 2),
         esal.Event(esal.Interval(4, 9), 'a', 0),
     ]
     expected = [
         (esal.Interval(2, 4), 1),
         (esal.Interval(4, 6), 2),
     ]
     actual = list(events.periods(evs, 2, 6, min_len=2))
     self.assertEqual(expected, actual)
Beispiel #15
0
 def test_same_points(self):
     evs = [
         esal.Event(esal.Interval(1, 1), 'a', 1),
         esal.Event(esal.Interval(1, 1), 'a', 1),
         esal.Event(esal.Interval(1, 1), 'a', 2),
         esal.Event(esal.Interval(1, 1), 'a', 2),
     ]
     expected = [
         (esal.Interval(1, 1), 1),
         (esal.Interval(1, 1), 2),
     ]
     actual = list(events.periods(evs, 1, 1))
     self.assertEqual(expected, actual)
Beispiel #16
0
 def test_merge(self):
     evs = [
         esal.Event(esal.Interval(1, 4), 'a', 1),
         esal.Event(esal.Interval(4, 8), 'a', 1),
         esal.Event(esal.Interval(6, 9), 'a', 2),
     ]
     expected = [
         (esal.Interval(0, 1), 0),
         (esal.Interval(1, 6), 1),
         (esal.Interval(6, 9), 2),
     ]
     actual = list(events.periods(evs, 0, 9))
     self.assertEqual(expected, actual)
Beispiel #17
0
 def test_overlapping_intervals(self):
     evs = [
         esal.Event(esal.Interval(1, 3), 'a', 1),
         esal.Event(esal.Interval(2, 4), 'a', 2),
         esal.Event(esal.Interval(3, 5), 'a', 1),
         esal.Event(esal.Interval(4, 6), 'a', 0),
         esal.Event(esal.Interval(5, 7), 'a', 2),
         esal.Event(esal.Interval(6, 8), 'a', 1),
     ]
     expected = [
         (esal.Interval(0, 1), 0),
         (esal.Interval(1, 2), 1),
         (esal.Interval(2, 3), 2),
         (esal.Interval(3, 5), 1),
         (esal.Interval(5, 6), 2),
         (esal.Interval(6, 8), 1),
         (esal.Interval(8, 9), 0),
     ]
     actual = list(events.periods(evs, 0, 9))
     self.assertEqual(expected, actual)
Beispiel #18
0
def periods(
        events,
        span_lo=None,
        span_hi=None,
        value=None,
        zero_values=(0, None),
        min_len=0,
        backoff=0,
        output_zero=0,
):
    """
    Yield disjoint intervals corresponding to different values of the
    given events.

    Converts a sequence of events that approximately represent a signal
    into a guess at the underlying piecewise constant signal (a sequence
    of intervals that partitions a span of time, where each interval has
    a value).  Assumes the given events are sorted by their start times.
    The conversion gives each interval a minimum length, unions
    intervals with the same value and then puts them in sequence by
    truncating an interval at the start of the next interval with a
    different, nonzero value (with optional back-off).  Finally, fills
    in gaps with zero values.  This is intended to be useful for
    constructing event "eras" where the values of an event are mutually
    exclusive (e.g. different dosages of a medication).

    For example, in the following, the top collection of intervals would
    be converted into the bottom sequence of intervals given min_len=6
    and backoff=2.

    --------------------------------------------------
                      222  22
       11111  111 11111                  11 11111
    00000                 00000 000000
    --------------------------------------------------
    00 111111 1111111 22222222222 000000 111111111 000
    --------------------------------------------------

    events:
        Iterable of events.
    span_lo:
        Start (if any) of span to which events are clipped.
    span_hi:
        End (if any) of span to which events are clipped.
    value:
        Function to extract values from events: value(event) -> object.
        Default uses `esal.Event.value`.
    zero_values:
        Set of values to treat as zero (non-signal) and ignore.
    min_len:
        Minimum length of each interval (prior to any truncation).
    backoff:
        Size of gap between intervals.  A larger gap increases the
        chances that an underlying transition from one value to the next
        happened in the gap.
        [TODO technically also need a starting lag / offset]
    output_zero:
        Value to use when filling in between nonzero values.
    """
    prds = []
    # Lengthen and clip nonzero periods
    for ev in events:
        # Ensure a minimum length before clipping
        lo = ev.when.lo
        hi = max(ev.when.hi, lo + min_len)
        val = value(ev) if value is not None else ev.value
        # Discard any events that are "non-events" (have zero value) or
        # that are outside the allowed span
        if (val in zero_values or
            (span_lo is not None and hi < span_lo) or
            (span_hi is not None and lo > span_hi)):
            continue
        # Clip to allowed span
        if span_hi is not None:
            hi = min(hi, span_hi)
        if span_lo is not None:
            lo = max(lo, span_lo)
        prds.append((lo, hi, val))
    # Merge and sequentialize periods
    mrg_idx = 0
    prd_idx = 1
    while prd_idx < len(prds):
        lo1, hi1, val1 = prds[mrg_idx]
        lo2, hi2, val2 = prds[prd_idx]
        # Merge periods with the same value
        if hi1 >= lo2 and val1 == val2:
            prds[mrg_idx] = (lo1, hi2, val1)
            del prds[prd_idx]
        else:
            # Put periods in sequence by removing overlaps
            if hi1 > lo2:
                prds[mrg_idx] = (lo1, lo2, val1)
            mrg_idx += 1
            prd_idx += 1
    # Yield periods with intervening zero periods as needed.  Separate
    # periods by backing off from the following nonzero event (if there
    # is one).
    zero_lo = span_lo
    for (idx, (lo, hi, val)) in enumerate(prds):
        # Yield a preceding zero period if it would be non-empty after
        # backing off from the current event
        zero_hi = lo - backoff
        if zero_lo < zero_hi:
            yield (esal.Interval(zero_lo, zero_hi), output_zero)
        # Back off from the following nonzero event if there is one
        hi_bk = (max(min(hi, prds[idx + 1][0] - backoff), lo)
                 if idx + 1 < len(prds)
                 else hi)
        yield (esal.Interval(lo, hi_bk), val)
        # Increment.  Delay the zero period by the backoff amount.
        zero_lo = hi_bk + backoff
    if zero_lo < span_hi:
        yield (esal.Interval(zero_lo, span_hi), output_zero)
Beispiel #19
0
def mk_feature_vectors(
    events_csv_filename,
    examples_csv_filename,
    features_csv_filename,
    events_csv_format=events.csv_format,
    examples_csv_format=examples.csv_format,
    features_csv_format=csv_format,
    events_header=events.header(),
    examples_header=examples.header(),
    features_header=header(),
    events_header_detector=True,
    examples_header_detector=True,
    features_header_detector=True,
    include_event_record=None,
    transform_event_record=None,
    include_example_record=None,
    always_feature_keys=(),
    feature_function_namespaces=None,
    feature_function_modules=None,
):
    """
    Make and yield feature vectors.

    Yields (example-label, example-weight, feature-vector) triples.
    """
    # Unpack events header
    ev_hdr_nm2idx = {f[0]: i for i, f in enumerate(events_header)}
    ev_id_idx = ev_hdr_nm2idx['id']
    # Unpack examples header
    ex_hdr_nm2idx = {f[0]: i for i, f in enumerate(examples_header)}
    ex_id_idx = ex_hdr_nm2idx['id']
    ex_lo_idx = ex_hdr_nm2idx['lo']
    ex_hi_idx = ex_hdr_nm2idx['hi']
    # Load example definitions
    exs = records.read_csv(examples_csv_filename,
                           examples_csv_format,
                           examples_header,
                           examples_header_detector,
                           include_record=include_example_record)
    # Collect examples by ID
    id2ex = collections.defaultdict(list)
    for ex in exs:
        id2ex[ex[ex_id_idx]].append(ex)
    # Load feature definitions
    _, _, feat_key2idsfuncs = load(
        features_csv_filename,
        features_csv_format,
        features_header,
        features_header_detector,
        feature_function_namespaces,
        feature_function_modules,
    )
    # Create a feature vector for each example definition.  Only
    # construct event sequences for IDs that have examples.
    for ev_seq in events.read_sequences(
            records.read_csv(
                events_csv_filename,
                events_csv_format,
                events_header,
                header_detector=events_header_detector,
                parser=False,
            ),
            header=events_header,
            parse_id=events_header[ev_id_idx][1],
            include_ids=id2ex,
            parse_record=records.mk_parser(events_header),
            include_record=include_event_record,
            transform_record=transform_event_record,
    ):
        # Skip any IDs without examples
        for ex in id2ex.get(ev_seq.id, ()):
            # Create a subsequence that includes all the events that
            # overlap the example period
            itvl = esal.Interval(ex[ex_lo_idx], ex[ex_hi_idx])
            subseq = ev_seq.subsequence(
                ev_seq.events_overlapping(itvl.lo, itvl.hi, itvl.is_lo_open,
                                          itvl.is_hi_open))
            # Create feature vector
            fv = vector(feat_key2idsfuncs, ex, subseq, always_feature_keys)
            # Yield example and its feature fector
            yield ex, fv