Esempio n. 1
0
    def test_class(self):
        import numpy as np
        from kamrecsys.datasets import load_movielens_mini
        from kamrecsys.tm.plsa_multi import EventScorePredictor

        data = load_movielens_mini()

        rcmdr = EventScorePredictor(tol=1e-8, k=2, random_state=1234)
        self.assertDictEqual(
            vars(rcmdr), {
                'k': 2,
                'tol': 1e-8,
                'maxiter': 100,
                'alpha': 1.0,
                'random_state': 1234,
                '_rng': None,
                'iid': None,
                'eid': None,
                'n_objects': None,
                'n_otypes': 0,
                'n_score_levels_': 0,
                'i_loss_': np.inf,
                'f_loss_': np.inf,
                'n_iter_': 0,
                'pZ_': None,
                'pYgZ_': None,
                'pRgZ_': None,
                'pXgZ_': None,
                'n_events_': 0,
                'n_users_': 0,
                'n_items_': 0,
                '_q': None,
                'score_levels_': None,
                'use_expectation': True
            })

        # import logging
        # logging.getLogger('kamrecsys').addHandler(logging.StreamHandler())
        rcmdr.fit(data)

        self.assertAlmostEqual(rcmdr.i_loss_, 5.41836900049, delta=1e-5)
        self.assertAlmostEqual(rcmdr.f_loss_, 5.17361298499, delta=1e-5)
        assert_allclose(rcmdr.score_levels_, [1, 2, 3, 4, 5], rtol=1e-5)

        # known user and item
        self.assertAlmostEqual(rcmdr.predict((1, 7)),
                               3.64580117249,
                               delta=1e-5)
        self.assertAlmostEqual(rcmdr.predict((1, 9)), 3.6587422493, delta=1e-5)
        self.assertAlmostEqual(rcmdr.predict((5, 7)),
                               3.60707987724,
                               delta=1e-5)
        self.assertAlmostEqual(rcmdr.predict((5, 9)),
                               3.62184516985,
                               delta=1e-5)

        # known user and unknown item
        self.assertAlmostEqual(rcmdr.predict((1, 11)),
                               3.66032199689,
                               delta=1e-5)
        self.assertAlmostEqual(rcmdr.predict((5, 12)),
                               3.62387542269,
                               delta=1e-5)

        # unknown user and known item
        self.assertAlmostEqual(rcmdr.predict((3, 7)),
                               3.60821491793,
                               delta=1e-5)
        self.assertAlmostEqual(rcmdr.predict((11, 9)),
                               3.62304301551,
                               delta=1e-5)

        # unknown user and item
        self.assertAlmostEqual(rcmdr.predict((3, 11)),
                               3.62507437787,
                               delta=1e-5)

        x = np.array([[1, 7], [1, 9], [1, 11], [3, 7], [3, 9], [3, 11], [5, 7],
                      [5, 9], [5, 11]])
        assert_allclose(rcmdr.predict(x), [
            3.64580117249, 3.6587422493, 3.66032199689, 3.60821491793,
            3.62304301551, 3.62507437787, 3.60707987724, 3.62184516985,
            3.62387542269
        ],
                        rtol=1e-5)

        rcmdr.use_expectation = False
        assert_allclose(rcmdr.predict(x), [4., 5., 5., 4., 4., 4., 4., 4., 4.],
                        rtol=1e-5)
Esempio n. 2
0
def training(opt, ev, tsc, event_feature=None, fold=0):
    """
    training model

    Parameters
    ----------
    opt : dict
        parsed command line options
    ev : array, size=(n_events, 2), dtype=np.int
        array of events in external ids
    tsc : array, size=(n_events,), dtype=np.float
        true scores
    event_feature : optional, structured array
        structured array of event features
    fold : int, default=0
        fold No.

    Returns
    -------
    rcmdr : EventScorePredictor
        trained recommender
    """

    # generate event data
    data = EventWithScoreData(n_otypes=2, n_stypes=1)
    if np.all(opt.domain == [0, 0, 0]):
        score_domain = (
            np.min(tsc), np.max(tsc), np.min(np.diff(np.unique(tsc))))
    else:
        score_domain = tuple(opt.domain)
    logger.info("score_domain = " + str(score_domain))
    data.set_events(ev, tsc, score_domain=score_domain,
                    event_feature=event_feature)

    # init learning results
    if 'training_start_time' not in opt:
        opt.training_start_time = [0] * opt.fold
    if 'training_end_time' not in opt:
        opt.training_end_time = [0] * opt.fold
    if 'learning_i_loss' not in opt:
        opt.learning_i_loss = [np.inf] * opt.fold
    if 'learning_f_loss' not in opt:
        opt.learning_f_loss = [np.inf] * opt.fold
    if 'learning_n_iter' not in opt:
        opt.learning_n_iter = [0] * opt.fold

    # set starting time
    start_time = datetime.datetime.now()
    start_utime = os.times()[0]
    opt.training_start_time[fold] = start_time.isoformat()
    logger.info("training_start_time = " + start_time.isoformat())

    # create and learning model
    if opt.method == 'plsam':
        rcmdr = EventScorePredictor(
            alpha=opt.alpha, k=opt.k, tol=opt.tol, maxiter=opt.maxiter,
            use_expectation=True, random_state=opt.rseed)
    else:
        rcmdr = EventScorePredictor(
            alpha=opt.alpha, k=opt.k, tol=opt.tol, maxiter=opt.maxiter,
            use_expectation=False, random_state=opt.rseed)
    rcmdr.fit(data)

    # set end and elapsed time
    end_time = datetime.datetime.now()
    end_utime = os.times()[0]
    elapsed_time = end_time - start_time
    elapsed_utime = end_utime - start_utime
    opt.training_end_time[fold] = end_time.isoformat()
    logger.info("training_end_time = " + end_time.isoformat())
    if 'training_elapsed_time' not in opt:
        opt.training_elapsed_time = elapsed_time
    else:
        opt.training_elapsed_time += elapsed_time
    logger.info("training_elapsed_time = " + str(opt.training_elapsed_time))
    if 'training_elapsed_utime' not in opt:
        opt.training_elapsed_utime = elapsed_utime
    else:
        opt.training_elapsed_utime += elapsed_utime
    logger.info("training_elapsed_utime = " + str(opt.training_elapsed_utime))

    # preserve optimizer's outputs
    opt.learning_i_loss[fold] = rcmdr.i_loss_
    opt.learning_f_loss[fold] = rcmdr.f_loss_
    opt.learning_n_iter[fold] = rcmdr.n_iter_

    return rcmdr
Esempio n. 3
0
    def test_class(self):
        import numpy as np
        from kamrecsys.datasets import load_movielens_mini
        from kamrecsys.tm.plsa_multi import EventScorePredictor

        data = load_movielens_mini()

        rcmdr = EventScorePredictor(tol=1e-8, k=2, random_state=1234)
        self.assertDictEqual(
            vars(rcmdr),
            {
                "k": 2,
                "tol": 1e-8,
                "maxiter": 100,
                "alpha": 1.0,
                "random_state": 1234,
                "_rng": None,
                "iid": None,
                "eid": None,
                "n_objects": None,
                "n_otypes": 0,
                "n_score_levels_": 0,
                "i_loss_": np.inf,
                "f_loss_": np.inf,
                "n_iter_": 0,
                "pZ_": None,
                "pYgZ_": None,
                "pRgZ_": None,
                "pXgZ_": None,
                "n_events_": 0,
                "n_users_": 0,
                "n_items_": 0,
                "_q": None,
                "score_levels_": None,
                "use_expectation": True,
            },
        )

        # import logging
        # logging.getLogger('kamrecsys').addHandler(logging.StreamHandler())
        rcmdr.fit(data)

        self.assertAlmostEqual(rcmdr.i_loss_, 5.41836900049, delta=1e-5)
        self.assertAlmostEqual(rcmdr.f_loss_, 5.17361298499, delta=1e-5)
        assert_allclose(rcmdr.score_levels_, [1, 2, 3, 4, 5], rtol=1e-5)

        # known user and item
        self.assertAlmostEqual(rcmdr.predict((1, 7)), 3.64580117249, delta=1e-5)
        self.assertAlmostEqual(rcmdr.predict((1, 9)), 3.6587422493, delta=1e-5)
        self.assertAlmostEqual(rcmdr.predict((5, 7)), 3.60707987724, delta=1e-5)
        self.assertAlmostEqual(rcmdr.predict((5, 9)), 3.62184516985, delta=1e-5)

        # known user and unknown item
        self.assertAlmostEqual(rcmdr.predict((1, 11)), 3.66032199689, delta=1e-5)
        self.assertAlmostEqual(rcmdr.predict((5, 12)), 3.62387542269, delta=1e-5)

        # unknown user and known item
        self.assertAlmostEqual(rcmdr.predict((3, 7)), 3.60821491793, delta=1e-5)
        self.assertAlmostEqual(rcmdr.predict((11, 9)), 3.62304301551, delta=1e-5)

        # unknown user and item
        self.assertAlmostEqual(rcmdr.predict((3, 11)), 3.62507437787, delta=1e-5)

        x = np.array([[1, 7], [1, 9], [1, 11], [3, 7], [3, 9], [3, 11], [5, 7], [5, 9], [5, 11]])
        assert_allclose(
            rcmdr.predict(x),
            [
                3.64580117249,
                3.6587422493,
                3.66032199689,
                3.60821491793,
                3.62304301551,
                3.62507437787,
                3.60707987724,
                3.62184516985,
                3.62387542269,
            ],
            rtol=1e-5,
        )

        rcmdr.use_expectation = False
        assert_allclose(rcmdr.predict(x), [4.0, 5.0, 5.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0], rtol=1e-5)
Esempio n. 4
0
def training(opt, ev, tsc, event_feature=None, fold=0):
    """
    training model

    Parameters
    ----------
    opt : dict
        parsed command line options
    ev : array, size=(n_events, 2), dtype=np.int
        array of events in external ids
    tsc : array, size=(n_events,), dtype=np.float
        true scores
    event_feature : optional, structured array
        structured array of event features
    fold : int, default=0
        fold No.

    Returns
    -------
    rcmdr : EventScorePredictor
        trained recommender
    """

    # generate event data
    data = EventWithScoreData(n_otypes=2, n_stypes=1)
    if np.all(opt.domain == [0, 0, 0]):
        score_domain = (np.min(tsc), np.max(tsc),
                        np.min(np.diff(np.unique(tsc))))
    else:
        score_domain = tuple(opt.domain)
    logger.info("score_domain = " + str(score_domain))
    data.set_events(ev,
                    tsc,
                    score_domain=score_domain,
                    event_feature=event_feature)

    # init learning results
    if 'training_start_time' not in opt:
        opt.training_start_time = [0] * opt.fold
    if 'training_end_time' not in opt:
        opt.training_end_time = [0] * opt.fold
    if 'learning_i_loss' not in opt:
        opt.learning_i_loss = [np.inf] * opt.fold
    if 'learning_f_loss' not in opt:
        opt.learning_f_loss = [np.inf] * opt.fold
    if 'learning_n_iter' not in opt:
        opt.learning_n_iter = [0] * opt.fold

    # set starting time
    start_time = datetime.datetime.now()
    start_utime = os.times()[0]
    opt.training_start_time[fold] = start_time.isoformat()
    logger.info("training_start_time = " + start_time.isoformat())

    # create and learning model
    if opt.method == 'plsam':
        rcmdr = EventScorePredictor(alpha=opt.alpha,
                                    k=opt.k,
                                    tol=opt.tol,
                                    maxiter=opt.maxiter,
                                    use_expectation=True,
                                    random_state=opt.rseed)
    else:
        rcmdr = EventScorePredictor(alpha=opt.alpha,
                                    k=opt.k,
                                    tol=opt.tol,
                                    maxiter=opt.maxiter,
                                    use_expectation=False,
                                    random_state=opt.rseed)
    rcmdr.fit(data)

    # set end and elapsed time
    end_time = datetime.datetime.now()
    end_utime = os.times()[0]
    elapsed_time = end_time - start_time
    elapsed_utime = end_utime - start_utime
    opt.training_end_time[fold] = end_time.isoformat()
    logger.info("training_end_time = " + end_time.isoformat())
    if 'training_elapsed_time' not in opt:
        opt.training_elapsed_time = elapsed_time
    else:
        opt.training_elapsed_time += elapsed_time
    logger.info("training_elapsed_time = " + str(opt.training_elapsed_time))
    if 'training_elapsed_utime' not in opt:
        opt.training_elapsed_utime = elapsed_utime
    else:
        opt.training_elapsed_utime += elapsed_utime
    logger.info("training_elapsed_utime = " + str(opt.training_elapsed_utime))

    # preserve optimizer's outputs
    opt.learning_i_loss[fold] = rcmdr.i_loss_
    opt.learning_f_loss[fold] = rcmdr.f_loss_
    opt.learning_n_iter[fold] = rcmdr.n_iter_

    return rcmdr