def test_accepts_cutoff_time_compose(dataframes, relationships): def fraud_occured(df): return df["fraud"].any() lm = cp.LabelMaker( target_dataframe_name="card_id", time_index="transaction_time", labeling_function=fraud_occured, window_size=1, ) transactions_df = to_pandas(dataframes["transactions"][0]) labels = lm.search(transactions_df, num_examples_per_instance=-1) labels["time"] = pd.to_numeric(labels["time"]) labels.rename({"card_id": "id"}, axis=1, inplace=True) feature_matrix, features = dfs( dataframes=dataframes, relationships=relationships, target_dataframe_name="cards", cutoff_time=labels, ) feature_matrix = to_pandas(feature_matrix, index="id") assert len(feature_matrix.index) == 6 assert len(feature_matrix.columns) == len(features) + 1
def test_accepts_cutoff_time_compose(entities, relationships): def fraud_occured(df): return df['fraud'].any() lm = cp.LabelMaker(target_entity='card_id', time_index='transaction_time', labeling_function=fraud_occured, window_size=1) transactions_df = entities['transactions'][0] if isinstance(transactions_df, dd.DataFrame): transactions_df = transactions_df.compute() labels = lm.search(transactions_df, num_examples_per_instance=-1) labels['time'] = pd.to_numeric(labels['time']) labels.rename({'card_id': 'id'}, axis=1, inplace=True) feature_matrix, features = dfs(entities=entities, relationships=relationships, target_entity="cards", cutoff_time=labels) if isinstance(feature_matrix, dd.DataFrame): feature_matrix = feature_matrix.compute().set_index('id') assert len(feature_matrix.index) == 6 assert len(feature_matrix.columns) == len(features) + 1
def lt(es): def label_func(df): return df['value'].sum() > 10 lm = cp.LabelMaker(target_entity='id', time_index='datetime', labeling_function=label_func, window_size='1m') df = es['log'].df df = to_pandas(df) labels = lm.search(df, num_examples_per_instance=-1) labels = labels.rename(columns={'cutoff_time': 'time'}) return labels
def lt(es): def label_func(df): return df["value"].sum() > 10 lm = cp.LabelMaker( target_dataframe_name="id", time_index="datetime", labeling_function=label_func, window_size="1m", ) df = es["log"] df = to_pandas(df) labels = lm.search(df, num_examples_per_instance=-1) labels = labels.rename(columns={"cutoff_time": "time"}) return labels
def total_spent(transactions, total_spent_fn): lm = cp.LabelMaker(target_entity='customer_id', time_index='time', labeling_function=total_spent_fn) lt = lm.search(transactions, num_examples_per_instance=1, verbose=False) return lt