train_sparse = sparse.csr_matrix((train[2], (train[0], train[1])),
                                     shape=(len(customers), len(products)))

print("IO done in %f" % io_time.interval)

alpha = 15
with Timer() as cython_als_t:
    user_vecs, item_vecs = implicit.alternating_least_squares(
        (train_sparse * alpha).astype('double'),
        factors=64,
        regularization=0.1,
        iterations=10,
        use_gpu=False)
print(f"Time spent in implicit: {cython_als_t.interval}")

evaluator = Evaluator(test[0], test[1], test[2], threshold=3.0)
baseline_model = BaselinePredictor(train[1], train[2])
baseline_fpr, baseline_tpr, baseline_roc = evaluator.roc(
    lambda user, item: baseline_model.pred(item))

fpr, tpr, roc = evaluator.roc(
    lambda user, item: np.sum(user_vecs[user, :] * item_vecs[item, :]))
print("AUC: %f" % roc)

plt.clf()
plt.plot(baseline_fpr, baseline_tpr, label='baseline')
plt.plot(fpr, tpr, label='als')
plt.xlabel('False positive')
plt.ylabel('True positive')
plt.legend()
plt.show()
Beispiel #2
0
    grouped_purchased.CustomerID.unique()))  # Get our unique customers
products = list(grouped_purchased.StockCode.unique()
                )  # Get our unique products that were purchased
quantity = list(grouped_purchased.Quantity)  # All of our purchases

rows = grouped_purchased.CustomerID.astype(
    pd.CategoricalDtype(categories=customers, ordered=True)).cat.codes
# Get the associated row indices
cols = grouped_purchased.StockCode.astype(
    pd.CategoricalDtype(categories=products, ordered=True)).cat.codes

train, test = train_test_split(rows.values, cols.values, quantity)

evaluator = Evaluator(test[0], test[1], test[2])
baseline_model = BaselinePredictor(train[1], train[2])
baseline_fpr, baseline_tpr, baseline_roc = evaluator.roc(
    lambda user, item: baseline_model.pred(item))

train_sparse = sparse.csr_matrix((train[2], (train[0], train[1])),
                                 shape=(len(customers), len(products)))

alpha = 15
with Timer() as cython_als_t:
    user_vecs, item_vecs = implicit.alternating_least_squares(
        (train_sparse * alpha).astype('double'),
        factors=32,
        regularization=0.1,
        iterations=50)
print(f"Time spent in implicit: {cython_als_t.interval}")

svd_predictor = lambda user, item: np.sum(user_vecs[user, :] * item_vecs[
    item, :])