Python LabelMaker.search 예제들, composeml.LabelMaker.search Python 예제들

예제 #1

0

파일 보기

def test_search_invalid_n_examples(transactions, total_spent_fn):
    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        labeling_function=total_spent_fn,
    )

    with pytest.raises(AssertionError, match='must specify gap'):
        next(lm.slice(transactions, num_examples_per_instance=2))

    with pytest.raises(AssertionError, match='must specify gap'):
        lm.search(transactions, num_examples_per_instance=2)

예제 #2

0

파일 보기

def test_search_offset_mix_3(transactions, total_spent_fn):
    """
    Test offset mix with window_size (absolute), minimum_data (absolute), and gap (relative).
    """
    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        labeling_function=total_spent_fn,
        window_size='8h',
    )

    given_labels = lm.search(
        transactions,
        num_examples_per_instance=-1,
        minimum_data='2019-01-01 08:00:00',
        gap=1,
    )

    given_labels = to_csv(given_labels, index=False)

    labels = [
        'customer_id,time,total_spent',
        '0,2019-01-01 08:00:00,2',
        '0,2019-01-01 08:30:00,1',
        '1,2019-01-01 09:00:00,3',
        '1,2019-01-01 09:30:00,2',
        '1,2019-01-01 10:00:00,1',
        '2,2019-01-01 10:30:00,4',
        '2,2019-01-01 11:00:00,3',
        '2,2019-01-01 11:30:00,2',
        '2,2019-01-01 12:00:00,1',
        '3,2019-01-01 12:30:00,1',
    ]

    assert given_labels == labels

예제 #3

0

파일 보기

def test_search_with_undefined_labels(transactions, total_spent_fn):
    def total_spent(ds):
        return total_spent_fn(ds) % 3

    lm = LabelMaker(target_entity='customer_id',
                    time_index='time',
                    labeling_function=total_spent)

    n_examples = {1: 1, 2: 1}
    given_labels = lm.search(transactions,
                             num_examples_per_instance=n_examples,
                             gap=1)
    given_labels = to_csv(given_labels, index=False)

    labels = [
        'customer_id,time,total_spent',
        '0,2019-01-01 08:00:00,2',
        '0,2019-01-01 08:30:00,1',
        '1,2019-01-01 09:30:00,2',
        '1,2019-01-01 10:00:00,1',
        '2,2019-01-01 10:30:00,1',
        '2,2019-01-01 11:30:00,2',
        '3,2019-01-01 12:30:00,1',
    ]

    assert given_labels == labels

예제 #4

0

파일 보기

def test_label_type(transactions, total_spent_fn):
    lm = LabelMaker(target_entity='customer_id',
                    time_index='time',
                    labeling_function=total_spent_fn)
    lt = lm.search(transactions, num_examples_per_instance=1)
    assert lt.target_types['total_spent'] == 'continuous'
    assert lt.bin(2).target_types['total_spent'] == 'discrete'

예제 #5

0

파일 보기

def test_search_offset_negative_1(transactions, total_spent_fn):
    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        labeling_function=lambda: None,
        window_size=2,
    )

    match = 'offset must be positive'
    with pytest.raises(AssertionError, match=match):
        lm.search(
            transactions,
            num_examples_per_instance=2,
            minimum_data='-1h',
            gap='-1h',
        )

예제 #6

0

파일 보기

def test_column_based_windows(transactions, total_spent_fn):
    session_id = [1, 2, 3, 3, 4, 5, 5, 5, 6, 7]
    df = transactions.assign(session_id=session_id)

    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        window_size='session_id',
        labeling_function=total_spent_fn,
    )

    actual = lm.search(df, -1).pipe(to_csv, index=False)

    expected = [
        'customer_id,time,total_spent',
        '0,2019-01-01 08:00:00,1',
        '0,2019-01-01 08:30:00,1',
        '1,2019-01-01 09:00:00,2',
        '1,2019-01-01 10:00:00,1',
        '2,2019-01-01 10:30:00,3',
        '2,2019-01-01 12:00:00,1',
        '3,2019-01-01 12:30:00,1',
    ]

    assert actual == expected

예제 #7

0

파일 보기

def test_invalid_threshold(transactions, total_spent_fn):
    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        labeling_function=lambda: None,
        window_size=2,
    )

    match = 'invalid threshold'

    with pytest.raises(ValueError, match=match):
        lm.search(
            transactions,
            num_examples_per_instance=2,
            minimum_data=' ',
        )

예제 #8

0

파일 보기

파일: test_label_maker.py 프로젝트: anhuaxiang/compose

def test_search_offset_mix_7(transactions, total_spent_fn):
    """
    Test offset mix with window_size (relative), minimum_data (relative), and gap (relative).
    """

    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        labeling_function=total_spent_fn,
        window_size=10,
    )

    given_labels = lm.search(
        transactions,
        num_examples_per_instance=float('inf'),
        verbose=False,
    )

    given_labels = to_csv(given_labels, index=False)

    labels = [
        'customer_id,cutoff_time,total_spent',
        '0,2019-01-01 08:00:00,2',
        '1,2019-01-01 09:00:00,3',
        '2,2019-01-01 10:30:00,4',
        '3,2019-01-01 12:30:00,1',
    ]

    assert given_labels == labels

예제 #9

0

파일 보기

파일: test_label_maker.py 프로젝트: anhuaxiang/compose

def test_search_offset_mix_2(transactions, total_spent_fn):
    """
    Test offset mix with window_size (absolute), minimum_data (relative), and gap (absolute).
    """
    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        labeling_function=total_spent_fn,
        window_size='30min',
    )

    given_labels = lm.search(
        transactions,
        num_examples_per_instance=2,
        minimum_data=2,
        verbose=False,
    )

    given_labels = to_csv(given_labels, index=False)

    labels = [
        'customer_id,cutoff_time,total_spent',
        '1,2019-01-01 10:00:00,1',
        '2,2019-01-01 11:30:00,1',
        '2,2019-01-01 12:00:00,1',
    ]

    assert given_labels == labels

예제 #10

0

파일 보기

def test_search_with_invalid_index(transactions, total_spent_fn):
    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        labeling_function=lambda df: None,
        window_size=2,
    )

    df = transactions.sample(n=10, random_state=0)
    match = "data frame must be sorted chronologically"
    with pytest.raises(AssertionError, match=match):
        lm.search(df, num_examples_per_instance=2)

    df = transactions.assign(time=pd.NaT)
    match = "index contains null values"
    with pytest.raises(AssertionError, match=match):
        lm.search(df, num_examples_per_instance=2)

예제 #11

0

파일 보기

파일: test_label_maker.py 프로젝트: anhuaxiang/compose

def test_label_type(transactions, total_spent_fn):
    lm = LabelMaker(target_entity='customer_id',
                    time_index='time',
                    labeling_function=total_spent_fn)
    lt = lm.search(transactions,
                   num_examples_per_instance=1,
                   label_type='discrete',
                   verbose=False)
    assert lt.label_type == 'discrete'

예제 #12

0

파일 보기

파일: test_label_maker.py 프로젝트: anhuaxiang/compose

def test_search_offset_negative_0(transactions, total_spent_fn):
    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        labeling_function=lambda: None,
        window_size=2,
    )

    match = 'must be greater than zero'

    with pytest.raises(AssertionError, match=match):
        lm.search(
            transactions,
            num_examples_per_instance=2,
            minimum_data=-1,
            gap=-1,
            verbose=False,
        )

예제 #13

0

파일 보기

def test_search_default(transactions, total_spent_fn):
    lm = LabelMaker(target_entity='customer_id', time_index='time', labeling_function=total_spent_fn)

    given_labels = lm.search(transactions, num_examples_per_instance=1)
    given_labels = to_csv(given_labels, index=False)

    labels = [
        'customer_id,time,total_spent',
        '0,2019-01-01 08:00:00,2',
        '1,2019-01-01 09:00:00,3',
        '2,2019-01-01 10:30:00,4',
        '3,2019-01-01 12:30:00,1',
    ]

    assert given_labels == labels

예제 #14

0

파일 보기

def test_search_on_empty_labels(transactions):
    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        labeling_function=lambda ds: None,
        window_size=2,
    )

    given_labels = lm.search(
        transactions,
        minimum_data=1,
        num_examples_per_instance=2,
        gap=1,
    )

    assert given_labels.empty

예제 #15

0

파일 보기

def test_search_on_empty_data_slices(transactions, total_spent_fn):
    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        labeling_function=lambda df: None,
        window_size=2,
    )

    transactions = transactions.assign(time=pd.NaT)

    given_labels = lm.search(
        transactions,
        minimum_data=1,
        num_examples_per_instance=2,
        gap=3,
    )

    assert given_labels.empty

예제 #16

0

파일 보기

def test_search_with_multiple_targets(transactions, total_spent_fn,
                                      unique_amounts_fn):
    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        window_size=2,
        labeling_function={
            'total_spent': total_spent_fn,
            'unique_amounts': unique_amounts_fn,
        },
    )

    expected = [
        'customer_id,time,total_spent,unique_amounts',
        '0,2019-01-01 08:00:00,2,1',
        '1,2019-01-01 09:00:00,2,1',
        '1,2019-01-01 10:00:00,1,1',
        '2,2019-01-01 10:30:00,2,1',
        '2,2019-01-01 11:30:00,2,1',
        '3,2019-01-01 12:30:00,1,1',
    ]

    lt = lm.search(transactions, num_examples_per_instance=-1)
    actual = lt.pipe(to_csv, index=False)
    info = 'unexpected calculated values'
    assert actual == expected, info

    expected = [
        'customer_id,time,unique_amounts',
        '0,2019-01-01 08:00:00,1',
        '1,2019-01-01 09:00:00,1',
        '1,2019-01-01 10:00:00,1',
        '2,2019-01-01 10:30:00,1',
        '2,2019-01-01 11:30:00,1',
        '3,2019-01-01 12:30:00,1',
    ]

    actual = lt.select('unique_amounts')
    actual = actual.pipe(to_csv, index=False)
    info = 'selected values differ from calculated values'
    assert actual == expected, info

예제 #17

0

파일 보기

def labels():
    df = ft.demo.load_mock_customer(return_single_table=True, random_seed=0)
    df = df[['transaction_time', 'customer_id', 'amount']]
    df.sort_values('transaction_time', inplace=True)

    lm = LabelMaker(
        target_entity='customer_id',
        time_index='transaction_time',
        labeling_function=total_spent,
        window_size='1h',
    )

    lt = lm.search(
        df,
        minimum_data='10min',
        num_examples_per_instance=2,
        gap='30min',
        drop_empty=True,
        verbose=False,
    )

    lt = lt.threshold(1250)
    return lt