예제 #1
0
def test_known_output_step_1():

    interactions = Interactions(np.zeros(5),
                                np.arange(5) + 1,
                                timestamps=np.arange(5))
    sequences = interactions.to_sequence(max_sequence_length=5,
                                         step_size=1).sequences

    expected = np.array([[1, 2, 3, 4, 5], [0, 1, 2, 3, 4], [0, 0, 1, 2, 3],
                         [0, 0, 0, 1, 2], [0, 0, 0, 0, 1]])

    assert np.all(sequences == expected)
예제 #2
0
def test_known_output_step_2():

    interactions = Interactions(np.zeros(5),
                                np.arange(5) + 1,
                                timestamps=np.arange(5))
    sequences = interactions.to_sequence(max_sequence_length=5,
                                         step_size=2).sequences

    expected = np.array([
        [1, 2, 3, 4, 5],
        [0, 0, 1, 2, 3],
        [0, 0, 0, 0, 1],
    ])

    assert np.all(sequences == expected)
def individual_predictions(df, model):
    num_users = len(df['user_id'].unique())
    num_items = len(df['item_id'].unique())
    predictions = np.zeros(shape=(num_users, num_items + 1))

    dataset = Interactions(user_ids=np.array(df['user_id'], dtype='int32'),
                           item_ids=np.array(df['item_id'], dtype='int32'),
                           timestamps=df['entry_at'])
    sequences = dataset.to_sequence(max_sequence_length=15)

    user_id = 0

    for user, sequence in zip(sequences.user_ids, sequences.sequences):
        if user == user_id:
            predictions[user] = model.predict(sequence)
            user_id += 1

    return predictions
 def interactions_to_sequence(f_train: Interactions, f_test: Interactions):
     train, test = f_train.to_sequence(), f_test.to_sequence()
     return train, test
예제 #5
0
interactions = Interactions(train_data["uindex"].to_numpy(),
                            train_data["vindex"].to_numpy(),
                            train_data["pct_cvt"].to_numpy(),
                            train_data["latest_watch_time"].to_numpy(),
                            num_users=len(original_train_data["uindex"].unique()),
                            num_items=num_items)


# if "1500K" in suffix:
#     logger.info("Increasing step size and max_sequence_length")
#     step_size = 2
#     min_sequence_length = 2
#     max_sequence_length = 50

train_seq = interactions.to_sequence(max_sequence_length=max_sequence_length,
                                     min_sequence_length=min_sequence_length,
                                     step_size=step_size)

logger.info("Data is loaded and converted to sequences..")

writer = SummaryWriter(log_dir='{}/{}'.format(tensorboard_base_dir, model_alias))
writer.add_text('alias', model_alias, 0)
writer.add_text('hyperparameters', str(h), 0)


def notify_loss_completion(epoch_id, batch_id, loss, net, model):
    # print("notify_loss_completion")
    writer.add_scalar("Batch/loss", loss, batch_id)
    logging.info('[Epoch {}] Batch {}, Loss {}'.format(epoch_id, batch_id, loss))

예제 #6
0
sales_categorical['user_id'] = sales_categorical['user_id'] + 1
sales_categorical['product_id'] = sales_categorical['product_id'] + 1
sales_categorical['timestep_id'] = sales_categorical['timestep_id'] + 1

#%%

from spotlight.interactions import Interactions
from spotlight.sequence.implicit import ImplicitSequenceModel

implicit_interactions = Interactions(
    sales_categorical['user_id'].astype('int32').values,
    sales_categorical['product_id'].astype('int32').values,
    timestamps=sales_categorical['timestep_id'].astype('int32').values)

sequential_interaction = implicit_interactions.to_sequence()

implicit_sequence_model = ImplicitSequenceModel()

#%%
start = datetime.now()
implicit_sequence_model = ImplicitSequenceModel(embedding_dim=100,
                                                representation='lstm',
                                                n_iter=5,
                                                use_cuda=True)
implicit_sequence_model.fit(sequential_interaction)
print(datetime.now() - start)

#%%

prediction = pd.DataFrame(implicit_sequence_model.predict([1337],
예제 #7
0
# create train models


from spotlight.sequence.implicit import ImplicitSequenceModel
model = ImplicitSequenceModel(embedding_dim=128)
preserving_25_percent_model = ImplicitSequenceModel(embedding_dim=128)
preserving_50_percent_model = ImplicitSequenceModel(embedding_dim=128)
preserving_75_percent_model = ImplicitSequenceModel(embedding_dim=128)


# fit models


model.fit(train.to_sequence(), verbose=True)
preserving_25_percent_model.fit(preserving_25_percent_train.to_sequence(), verbose=True)
preserving_50_percent_model.fit(preserving_50_percent_train.to_sequence(), verbose=True)
preserving_75_percent_model.fit(preserving_75_percent_train.to_sequence(), verbose=True)

import torch
torch.save(preserving_25_percent_model, './preserving_25_percent_model.model')
torch.save(preserving_50_percent_model, './preserving_50_percent_model.model')
torch.save(preserving_75_percent_model, './preserving_75_percent_model.model')
# result evaluation

from spotlight.evaluation import mrr_score
train_mrrs = mrr_score(model, train)
preserving_25_train_mrrs = mrr_score(preserving_25_percent_model, preserving_25_percent_train)
preserving_50_train_mrrs = mrr_score(preserving_50_percent_model, preserving_50_percent_train)
preserving_75_train_mrrs = mrr_score(preserving_75_percent_model, preserving_75_percent_train)
예제 #8
0
def main(args):
    df_rate, user_ids, neg_prob = get_data(args)

    load_model = False

    print("seq_len {}, epoch {}".format(args.seq_len, args.epoch))

    dataset = Interactions(df_rate.mapped_userid.values.astype("int32"), \
                           df_rate.foodid.values.astype("int32"), \
                           timestamps=df_rate.timestamp.values.astype("int32"), \
                           weights=df_rate.norm_cnt.values.astype("float32"))

    train, test = timestamp_based_train_test_split(dataset,
                                                   test_percentage=0.2)
    train = train.to_sequence(max_sequence_length=args.seq_len,
                              min_sequence_length=args.min_seq_len,
                              step_size=args.step_size,
                              mode=args.mode)
    test = test.to_sequence(max_sequence_length=args.seq_len,
                            min_sequence_length=args.min_seq_len,
                            step_size=args.step_size,
                            mode=args.mode)
    dataset = dataset.to_sequence(max_sequence_length=args.seq_len,
                                  min_sequence_length=args.min_seq_len,
                                  step_size=args.step_size,
                                  mode=args.mode)

    model = ImplicitSequenceModel(
        loss=args.loss,
        representation=args.representation,
        embedding_dim=args.embedding_dim,
        n_iter=args.epoch,
        batch_size=256,
        l2=0.0,
        learning_rate=0.001,
        optimizer_func=None,
        use_cuda=True,
        sparse=False,
        random_state=None,
        num_negative_samples=args.num_negative_samples,
        test_data=test,
        neg_prob=neg_prob)

    print("train.shape", train.sequences.shape)
    print("test.shape", test.sequences.shape)
    print("Fitting model")
    for i in range(5):
        model.fit(dataset,
                  verbose=True,
                  calc_map=args.calc_map,
                  neg_mode=args.neg_mode)
    model.fit(test,
              verbose=True,
              calc_map=args.calc_map,
              neg_mode=args.neg_mode)

    if args.save_model:
        torch.save(model, args.model_path)
    if load_model == True:
        model = torch.load(args.model_path)

    if args.calc_map == False:
        ap = NewAveragePrecision(model, test, k=20)
        print("map: ", ap.mean())

    predict(args, model, df_rate, neg_prob)