Ejemplo n.º 1
0
def test_recommend_k_items(
    threshold, similarity_type, file, header, sar_settings, demo_usage_data
):
    time_now = demo_usage_data[header["col_timestamp"]].max()
    model = SARSingleNode(
        similarity_type=similarity_type,
        timedecay_formula=True,
        time_decay_coefficient=30,
        time_now=time_now,
        threshold=threshold,
        **header
    )
    model.fit(demo_usage_data)

    true_items, true_scores = load_userpred(
        sar_settings["FILE_DIR"]
        + "userpred_"
        + file
        + str(threshold)
        + "_userid_only.csv"
    )
    test_results = model.recommend_k_items(
        demo_usage_data[
            demo_usage_data[header["col_user"]] == sar_settings["TEST_USER_ID"]
        ],
        top_k=10,
        sort_top_k=True,
        remove_seen=True,
    )
    test_items = list(test_results[header["col_item"]])
    test_scores = np.array(test_results["prediction"])
    assert true_items == test_items
    assert np.allclose(true_scores, test_scores, atol=sar_settings["ATOL"])
Ejemplo n.º 2
0
    "col_timestamp": "Timestamp",
}

model = SARSingleNode(remove_seen=True,
                      similarity_type="jaccard",
                      time_decay_coefficient=30,
                      time_now=None,
                      timedecay_formula=True,
                      **header)

start_time = time.time()
model.fit(train)
train_time = time.time() - start_time

start_time = time.time()
topk = model.recommend_k_items(test)
test_time = time.time() - start_time

# TODO: remove this call when the model returns same type as input

topk['UserId'] = pd.to_numeric(topk['UserId'])
topk['MovieId'] = pd.to_numeric(topk['MovieId'])

mlask(begin="\n", end="\n")

mlcat(
    "Fit the SAR Model", """\
We will now fit the model (<1s) and apply it to the test dataset

For a random sample of users from the test dataset we list the model's
prediction of their rating of a particular movie. The predicted ratings 
Ejemplo n.º 3
0
                      time_decay_coefficient=30,
                      time_now=None,
                      timedecay_formula=True,
                      **header)

# train the SAR model
start_time = time.time()

model.fit(train)

train_time = time.time() - start_time
run.log(name="Training time", value=train_time)

start_time = time.time()

top_k = model.recommend_k_items(test)

test_time = time.time() - start_time
run.log(name="Prediction time", value=test_time)

# TODO: remove this call when the model returns same type as input
top_k['UserId'] = pd.to_numeric(top_k['UserId'])
top_k['MovieId'] = pd.to_numeric(top_k['MovieId'])

# evaluate
eval_map = map_at_k(test,
                    top_k,
                    col_user="******",
                    col_item="MovieId",
                    col_rating="Rating",
                    col_prediction="prediction",
Ejemplo n.º 4
0
                                          col_item=header["col_item"],
                                          seed=42)

    # set log level to INFO
    logging.basicConfig(level=logging.DEBUG,
                        format='%(asctime)s %(levelname)-8s %(message)s')

    model = SARSingleNode(similarity_type="jaccard",
                          time_decay_coefficient=30,
                          time_now=None,
                          timedecay_formula=True,
                          **header)

    model.fit(train)

    top_k = model.recommend_k_items(test, remove_seen=True)

    top_k_with_titles = (top_k.join(
        data[['MovieId', 'Title']].drop_duplicates().set_index('MovieId'),
        on='MovieId',
        how='inner').sort_values(by=['UserId', 'Prediction'], ascending=False))

    args = [test, top_k]
    kwargs = dict(col_user='******',
                  col_item='MovieId',
                  col_rating='Rating',
                  col_prediction='Prediction',
                  relevancy_method='top_k',
                  k=TOP_K)

    eval_map = map_at_k(*args, **kwargs)