def test_get_normalized_scores(header):
    train = pd.DataFrame({
        header["col_user"]: [1, 1, 1, 1, 2, 2, 2, 2],
        header["col_item"]: [1, 2, 3, 4, 1, 5, 6, 7],
        header["col_rating"]: [3.0, 4.0, 5.0, 4.0, 3.0, 2.0, 1.0, 5.0],
        header["col_timestamp"]: [1, 20, 30, 400, 50, 60, 70, 800],
    })
    test = pd.DataFrame({
        header["col_user"]: [1, 1, 1, 2, 2, 2],
        header["col_item"]: [5, 6, 7, 2, 3, 4],
        header["col_rating"]: [2.0, 1.0, 5.0, 3.0, 4.0, 5.0],
    })

    model = SARSingleNode(**header, timedecay_formula=True, normalize=True)
    model.fit(train)
    actual = model.score(test, remove_seen=True)
    expected = np.array([
        [
            -np.inf, -np.inf, -np.inf, -np.inf, 1.23512374, 1.23512374,
            1.23512374
        ],
        [
            -np.inf, 1.23512374, 1.23512374, 1.23512374, -np.inf, -np.inf,
            -np.inf
        ],
    ])
    assert actual.shape == (2, 7)
    assert isinstance(actual, np.ndarray)
    assert np.isclose(expected, np.asarray(actual)).all()

    actual = model.score(test)
    expected = np.array([
        [
            3.11754872,
            4.29408577,
            4.29408577,
            4.29408577,
            1.23512374,
            1.23512374,
            1.23512374,
        ],
        [
            2.5293308,
            1.23511758,
            1.23511758,
            1.23511758,
            3.11767458,
            3.11767458,
            3.11767458,
        ],
    ])

    assert actual.shape == (2, 7)
    assert isinstance(actual, np.ndarray)
    assert np.isclose(expected, np.asarray(actual)).all()
Example #2
0
def test_get_item_based_topk(header, pandas_dummy):

    sar = SARSingleNode(**header)
    sar.fit(pandas_dummy)

    # test with just items provided
    expected = pd.DataFrame(
        dict(UserId=[0, 0, 0], MovieId=[8, 7, 6], prediction=[2.0, 2.0, 2.0])
    )
    items = pd.DataFrame({header["col_item"]: [1, 5, 10]})
    actual = sar.get_item_based_topk(items, top_k=3)
    assert_frame_equal(expected, actual, check_dtype=False)

    # test with items and users
    expected = pd.DataFrame(
        dict(
            UserId=[100, 100, 100, 1, 1, 1],
            MovieId=[8, 7, 6, 4, 3, 10],
            prediction=[2.0, 2.0, 2.0, 2.0, 2.0, 1.0],
        )
    )
    items = pd.DataFrame(
        {
            header["col_user"]: [100, 100, 1, 100, 1, 1],
            header["col_item"]: [1, 5, 1, 10, 2, 6],
        }
    )
    actual = sar.get_item_based_topk(items, top_k=3, sort_top_k=True)
    assert_frame_equal(expected, actual, check_dtype=False)

    # test with items, users, and ratings
    expected = pd.DataFrame(
        dict(
            UserId=[100, 100, 100, 1, 1, 1],
            MovieId=[2, 4, 3, 4, 3, 10],
            prediction=[5.0, 5.0, 5.0, 8.0, 8.0, 4.0],
        )
    ).set_index(["UserId", "MovieId"])
    items = pd.DataFrame(
        {
            header["col_user"]: [100, 100, 1, 100, 1, 1],
            header["col_item"]: [1, 5, 1, 10, 2, 6],
            header["col_rating"]: [5, 1, 3, 1, 5, 4],
        }
    )
    actual = sar.get_item_based_topk(items, top_k=3).set_index(["UserId", "MovieId"])
    assert_frame_equal(expected, actual, check_like=True)
Example #3
0
def test_fit(similarity_type, timedecay_formula, train_test_dummy_timestamp, header):
    model = SARSingleNode(
        similarity_type=similarity_type, timedecay_formula=timedecay_formula, **header
    )
    trainset, testset = train_test_dummy_timestamp
    model.fit(trainset)
Example #4
0
train, test = python_random_split(data, 0.75)

# instantiate the SAR algorithm and set the index
header = {
    "col_user": "******",
    "col_item": "MovieId",
    "col_rating": "Rating",
    "col_timestamp": "Timestamp",
}

logging.basicConfig(level=logging.DEBUG,
                    format='%(asctime)s %(levelname)-8s %(message)s')

model = SARSingleNode(remove_seen=True,
                      similarity_type="jaccard",
                      time_decay_coefficient=30,
                      time_now=None,
                      timedecay_formula=True,
                      **header)

# train the SAR model
start_time = time.time()

model.fit(train)

train_time = time.time() - start_time
run.log(name="Training time", value=train_time)

start_time = time.time()

top_k = model.recommend_k_items(test)
Example #5
0
def train_sar(params, data):
    model = SARSingleNode(**params)
    model.set_index(data)
    with Timer() as t:
        model.fit(data)
    return model, t
def train_sar(params, data):
    model = SARSingleNode(**params)
    model.set_index(data)    
    with Timer() as t:
        model.fit(data)
    return model, t