Esempio n. 1
0
def days_to_first_event(df, groupby, time_col):
    """ Calculate days to the first date for each group, in a Time series """
    dates = df[time_col].astype('datetime64[ns]')
    ids = df[groupby].values
    result = wnp.group_apply(dates, ids, _time_to_min_date)
    result = _convert_ns_to_days(result)
    return result
Esempio n. 2
0
def grouped_lagged_decay(df, groupby, col, fillna=0, decay=1):
    """ Grouped lagged decay """
    values = wnp.fillna(df[col].values, 0)
    f = partial(lagged_decay, decay=decay)
    result = wnp.group_apply(values, df[groupby].values, f)
    result = wnp.fillna(result, fillna)
    return result
Esempio n. 3
0
def test_vector_group_apply_works_with_2dims():
    values = np.array([1, 1, 1, 2, 2, 2])
    apply_func = np.sum
    ids = np.array([[1, 0], [1, 0], [1, 0], [1, 1], [2, 2], [2, 2]])
    expected = np.array([3, 3, 3, 2, 4, 4])
    output = utils_np.group_apply(values, ids, apply_func)
    assert np.all(output == expected)
Esempio n. 4
0
def grouped_days_since_result(df,
                              groupby,
                              col='win_flag',
                              value=1,
                              fillna=-1,
                              coldate='scheduled_time'):
    func = partial(days_since_result, value=1)
    result = wnp.group_apply(df[[col, coldate]].values,
                             df[groupby].values,
                             func,
                             multiarg=True)
    result = wnp.fillna(result, fillna)
    return result
Esempio n. 5
0
def test_vector_group_apply():
    test_data = [
        # values, ids, expected
        (
            np.array([1, 0, 0, 1, 1, 0]),
            np.array([1, 2, 1, 2, 1, 2]),
            np.array([1, 0, 1, 1, 2, 1]),
        ),
        (
            np.array([1, 0, 0, 1, 1, 0]),
            np.array(["A", "B", "A", "B", "A", "B"]),
            np.array([1, 0, 1, 1, 2, 1]),
        ),
    ]
    for values, ids, expected in test_data:
        output = utils_np.group_apply(values, ids, np.cumsum)
        assert np.all(output == expected)
Esempio n. 6
0
def test_vector_group_apply_works_with_tuple_ids():
    values = np.array([1, 1, 1, 2, 2, 2])
    all_ids = [
        np.array(
            list(
                zip(np.array([1, 1, 1, 2, 2, 2]), np.array([0, 0, 0, 1, 2,
                                                            2])))),
        np.array(
            list(
                zip(
                    np.array([1, 1, 1, 2, 2, 2]),
                    np.array(["A", "A", "A", "B", "C", "C"]),
                ))),
        np.array(
            list(
                zip(
                    np.array(["a", "a", "a", "b", "b", "b"]),
                    np.array(["A", "A", "A", "B", "C", "C"]),
                ))),
    ]
    expected = np.array([3, 3, 3, 2, 4, 4])
    for ids in all_ids:
        output = utils_np.group_apply(values, ids, np.sum)
        assert np.all(output == expected)
Esempio n. 7
0
def grouped_ema(df, col, alpha, groupby):
    v = df[col].values
    func = partial(ema, alpha=alpha)
    result = wnp.group_apply(v, df[groupby].values, func)
    return result