예제 #1
0
def test_sparse_to_df(test_specs, python_dataset):
    # initialize the splitter
    header = {
        "col_user": DEFAULT_USER_COL,
        "col_item": DEFAULT_ITEM_COL,
        "col_rating": DEFAULT_RATING_COL,
    }

    # instantiate the the affinity matrix
    am = AffinityMatrix(DF=python_dataset, **header)

    # generate the sparse matrix representation
    X, _, _ = am.gen_affinity_matrix()

    # use the inverse function to generate a pandas df from a sparse matrix ordered by userID
    DF = am.map_back_sparse(X, kind="ratings")

    # tests: check that the two dataframes have the same elements in the same positions.
    assert (
        DF.userID.values.all()
        == python_dataset.sort_values(by=["userID"]).userID.values.all()
    )

    assert (
        DF.itemID.values.all()
        == python_dataset.sort_values(by=["userID"]).itemID.values.all()
    )

    assert (
        DF.rating.values.all()
        == python_dataset.sort_values(by=["userID"]).rating.values.all()
    )
예제 #2
0
def test_sparse_to_df(test_specs, python_dataset):
    # initialize the splitter
    header = {
        "col_user": DEFAULT_USER_COL,
        "col_item": DEFAULT_ITEM_COL,
        "col_rating": DEFAULT_RATING_COL,
    }

    # instantiate the the affinity matrix
    am = AffinityMatrix(DF=python_dataset, **header)

    # generate the sparse matrix representation
    X = am.gen_affinity_matrix()

    # use the inverse function to generate a pandas df from a sparse matrix ordered by userID
    DF = am.map_back_sparse(X, kind="ratings")

    # tests: check that the two dataframes have the same elements in the same positions.
    assert (
        DF.userID.values.all()
        == python_dataset.sort_values(by=["userID"]).userID.values.all()
    )

    assert (
        DF.itemID.values.all()
        == python_dataset.sort_values(by=["userID"]).itemID.values.all()
    )

    assert (
        DF.rating.values.all()
        == python_dataset.sort_values(by=["userID"]).rating.values.all()
    )
def RBMtrain():
    data = pd.read_csv("SnacksData100.csv")
    header = {
        "col_user": "******",
        "col_item": "Product_Id",
        "col_rating": "Ratings",
    }
    am = AffinityMatrix(DF=data, **header)
    X = am.gen_affinity_matrix()
    Xtr, Xtst = numpy_stratified_split(X)
    model = RBM(hidden_units=600,
                training_epoch=30,
                minibatch_size=60,
                keep_prob=0.9,
                with_metrics=True)
    model.fit(Xtr, Xtst)
    top_k, test_time = model.recommend_k_items(Xtst)
    top_k_df = am.map_back_sparse(top_k, kind='prediction')
    test_df = am.map_back_sparse(Xtst, kind='ratings')
    joblib.dump(top_k_df, 'testdata')
예제 #4
0
We will now fit the model and apply it to the dataset
""")

mlask(begin="\n")

### TODO HOW TO KEEP THE OUTPUT QUIET??????

train_time = model.fit(Xtr, Xtst)

### TODO HOW TO KEEP THE OUTPUT QUIET??????

top_k, test_time = model.recommend_k_items(Xtst)

# Map the index back to original ids?????

top_k_df = am.map_back_sparse(top_k, kind='prediction')
test_df = am.map_back_sparse(Xtst, kind='ratings')

top_k_df.head(10)


def ranking_metrics(data_size, data_true, data_pred, time_train, time_test, K):

    eval_map = map_at_k(data_true,
                        data_pred,
                        col_user="******",
                        col_item="MovieID",
                        col_rating="Rating",
                        col_prediction="prediction",
                        relevancy_method="top_k",
                        k=K)