Ejemplo n.º 1
0
def test_sparse_to_df(test_specs, python_dataset):
    # initialize the splitter
    header = {
        "col_user": DEFAULT_USER_COL,
        "col_item": DEFAULT_ITEM_COL,
        "col_rating": DEFAULT_RATING_COL,
    }

    # instantiate the the affinity matrix
    am = AffinityMatrix(DF=python_dataset, **header)

    # generate the sparse matrix representation
    X, _, _ = am.gen_affinity_matrix()

    # use the inverse function to generate a pandas df from a sparse matrix ordered by userID
    DF = am.map_back_sparse(X, kind="ratings")

    # tests: check that the two dataframes have the same elements in the same positions.
    assert (
        DF.userID.values.all()
        == python_dataset.sort_values(by=["userID"]).userID.values.all()
    )

    assert (
        DF.itemID.values.all()
        == python_dataset.sort_values(by=["userID"]).itemID.values.all()
    )

    assert (
        DF.rating.values.all()
        == python_dataset.sort_values(by=["userID"]).rating.values.all()
    )
Ejemplo n.º 2
0
def test_df_to_sparse(test_specs, python_dataset):
    # initialize the splitter
    header = {
        "col_user": DEFAULT_USER_COL,
        "col_item": DEFAULT_ITEM_COL,
        "col_rating": DEFAULT_RATING_COL,
    }

    # instantiate the affinity matrix
    am = AffinityMatrix(DF=python_dataset, **header)

    # obtain the sparse matrix representation of the input dataframe
    X, _, _ = am.gen_affinity_matrix()

    # check that the generated matrix has the correct dimensions
    assert (X.shape[0] == python_dataset.userID.unique().shape[0]) & (
        X.shape[1] == python_dataset.itemID.unique().shape[0])
def RBMtrain():
    data = pd.read_csv("SnacksData100.csv")
    header = {
        "col_user": "******",
        "col_item": "Product_Id",
        "col_rating": "Ratings",
    }
    am = AffinityMatrix(DF=data, **header)
    X = am.gen_affinity_matrix()
    Xtr, Xtst = numpy_stratified_split(X)
    model = RBM(hidden_units=600,
                training_epoch=30,
                minibatch_size=60,
                keep_prob=0.9,
                with_metrics=True)
    model.fit(Xtr, Xtst)
    top_k, test_time = model.recommend_k_items(Xtst)
    top_k_df = am.map_back_sparse(top_k, kind='prediction')
    test_df = am.map_back_sparse(Xtst, kind='ratings')
    joblib.dump(top_k_df, 'testdata')
Ejemplo n.º 4
0
smpl = pd.merge(data, titles, on="MovieID").sample(SMPLS)
smpl['MovieTitle'] = smpl['MovieTitle'].str[:TITLEN]
smpl['Rating'] = pd.to_numeric(smpl['Rating'], downcast='integer')
del smpl['Timestamp']  # Drop the column from printing.
print(smpl.to_string())

header = {
    "col_user": "******",
    "col_item": "MovieID",
    "col_rating": "Rating",
}

# Use a sparse matrix representation rather than a pandas data frame
# for significant performance gain.

am = AffinityMatrix(DF=data, **header)
X = am.gen_affinity_matrix()

# Contstruct the training and test datasets.

Xtr, Xtst = numpy_stratified_split(X)

print('\nTraining matrix size (users, movies) is:', Xtr.shape)
print('Testing matrix size is: ', Xtst.shape)

# Initialize the model class. Note that through random variation we
# can get a much better performing model with seed=1!

model = RBM(
    hidden_units=600,
    training_epoch=30,
Ejemplo n.º 5
0
electronics_data = pd.read_csv('SnackBars.csv')

# Convert to 32-bit in order to reduce memory consumption
electronics_data.loc[:, 'Rating'] = electronics_data['Rating'].astype(np.int32)
electronics_data.loc[:, 'Price'] = electronics_data['Price'].astype(np.int32)

header = {
    "col_user": "******",
    "col_item": "Snack Subscription ID",
    "col_rating": "Rating",
    #"col_rating": "Price",
}

#instantiate the sparse matrix generation
am = AffinityMatrix(DF=electronics_data, **header)

#obtain the sparse matrix
X = am.gen_affinity_matrix()

#df_train.to_csv ('Trained_output.csv', index = False, header=True)

Xtr, Xtst = numpy_stratified_split(X)
selection = st.slider('Select a range of epoch', 10, 100)
HiddenUnits = st.slider('Select the number of hidden layers', 100, 600)

#First we initialize the model class
model = RBM(hidden_units=HiddenUnits,
            training_epoch=selection,
            minibatch_size=60,
            keep_prob=0.9)