def test_class(self): import numpy as np from kamrecsys.datasets import load_movielens_mini from kamrecsys.tm.plsa_multi import EventScorePredictor data = load_movielens_mini() rcmdr = EventScorePredictor(tol=1e-8, k=2, random_state=1234) self.assertDictEqual( vars(rcmdr), { 'k': 2, 'tol': 1e-8, 'maxiter': 100, 'alpha': 1.0, 'random_state': 1234, '_rng': None, 'iid': None, 'eid': None, 'n_objects': None, 'n_otypes': 0, 'n_score_levels_': 0, 'i_loss_': np.inf, 'f_loss_': np.inf, 'n_iter_': 0, 'pZ_': None, 'pYgZ_': None, 'pRgZ_': None, 'pXgZ_': None, 'n_events_': 0, 'n_users_': 0, 'n_items_': 0, '_q': None, 'score_levels_': None, 'use_expectation': True }) # import logging # logging.getLogger('kamrecsys').addHandler(logging.StreamHandler()) rcmdr.fit(data) self.assertAlmostEqual(rcmdr.i_loss_, 5.41836900049, delta=1e-5) self.assertAlmostEqual(rcmdr.f_loss_, 5.17361298499, delta=1e-5) assert_allclose(rcmdr.score_levels_, [1, 2, 3, 4, 5], rtol=1e-5) # known user and item self.assertAlmostEqual(rcmdr.predict((1, 7)), 3.64580117249, delta=1e-5) self.assertAlmostEqual(rcmdr.predict((1, 9)), 3.6587422493, delta=1e-5) self.assertAlmostEqual(rcmdr.predict((5, 7)), 3.60707987724, delta=1e-5) self.assertAlmostEqual(rcmdr.predict((5, 9)), 3.62184516985, delta=1e-5) # known user and unknown item self.assertAlmostEqual(rcmdr.predict((1, 11)), 3.66032199689, delta=1e-5) self.assertAlmostEqual(rcmdr.predict((5, 12)), 3.62387542269, delta=1e-5) # unknown user and known item self.assertAlmostEqual(rcmdr.predict((3, 7)), 3.60821491793, delta=1e-5) self.assertAlmostEqual(rcmdr.predict((11, 9)), 3.62304301551, delta=1e-5) # unknown user and item self.assertAlmostEqual(rcmdr.predict((3, 11)), 3.62507437787, delta=1e-5) x = np.array([[1, 7], [1, 9], [1, 11], [3, 7], [3, 9], [3, 11], [5, 7], [5, 9], [5, 11]]) assert_allclose(rcmdr.predict(x), [ 3.64580117249, 3.6587422493, 3.66032199689, 3.60821491793, 3.62304301551, 3.62507437787, 3.60707987724, 3.62184516985, 3.62387542269 ], rtol=1e-5) rcmdr.use_expectation = False assert_allclose(rcmdr.predict(x), [4., 5., 5., 4., 4., 4., 4., 4., 4.], rtol=1e-5)
def training(opt, ev, tsc, event_feature=None, fold=0): """ training model Parameters ---------- opt : dict parsed command line options ev : array, size=(n_events, 2), dtype=np.int array of events in external ids tsc : array, size=(n_events,), dtype=np.float true scores event_feature : optional, structured array structured array of event features fold : int, default=0 fold No. Returns ------- rcmdr : EventScorePredictor trained recommender """ # generate event data data = EventWithScoreData(n_otypes=2, n_stypes=1) if np.all(opt.domain == [0, 0, 0]): score_domain = ( np.min(tsc), np.max(tsc), np.min(np.diff(np.unique(tsc)))) else: score_domain = tuple(opt.domain) logger.info("score_domain = " + str(score_domain)) data.set_events(ev, tsc, score_domain=score_domain, event_feature=event_feature) # init learning results if 'training_start_time' not in opt: opt.training_start_time = [0] * opt.fold if 'training_end_time' not in opt: opt.training_end_time = [0] * opt.fold if 'learning_i_loss' not in opt: opt.learning_i_loss = [np.inf] * opt.fold if 'learning_f_loss' not in opt: opt.learning_f_loss = [np.inf] * opt.fold if 'learning_n_iter' not in opt: opt.learning_n_iter = [0] * opt.fold # set starting time start_time = datetime.datetime.now() start_utime = os.times()[0] opt.training_start_time[fold] = start_time.isoformat() logger.info("training_start_time = " + start_time.isoformat()) # create and learning model if opt.method == 'plsam': rcmdr = EventScorePredictor( alpha=opt.alpha, k=opt.k, tol=opt.tol, maxiter=opt.maxiter, use_expectation=True, random_state=opt.rseed) else: rcmdr = EventScorePredictor( alpha=opt.alpha, k=opt.k, tol=opt.tol, maxiter=opt.maxiter, use_expectation=False, random_state=opt.rseed) rcmdr.fit(data) # set end and elapsed time end_time = datetime.datetime.now() end_utime = os.times()[0] elapsed_time = end_time - start_time elapsed_utime = end_utime - start_utime opt.training_end_time[fold] = end_time.isoformat() logger.info("training_end_time = " + end_time.isoformat()) if 'training_elapsed_time' not in opt: opt.training_elapsed_time = elapsed_time else: opt.training_elapsed_time += elapsed_time logger.info("training_elapsed_time = " + str(opt.training_elapsed_time)) if 'training_elapsed_utime' not in opt: opt.training_elapsed_utime = elapsed_utime else: opt.training_elapsed_utime += elapsed_utime logger.info("training_elapsed_utime = " + str(opt.training_elapsed_utime)) # preserve optimizer's outputs opt.learning_i_loss[fold] = rcmdr.i_loss_ opt.learning_f_loss[fold] = rcmdr.f_loss_ opt.learning_n_iter[fold] = rcmdr.n_iter_ return rcmdr
def test_class(self): import numpy as np from kamrecsys.datasets import load_movielens_mini from kamrecsys.tm.plsa_multi import EventScorePredictor data = load_movielens_mini() rcmdr = EventScorePredictor(tol=1e-8, k=2, random_state=1234) self.assertDictEqual( vars(rcmdr), { "k": 2, "tol": 1e-8, "maxiter": 100, "alpha": 1.0, "random_state": 1234, "_rng": None, "iid": None, "eid": None, "n_objects": None, "n_otypes": 0, "n_score_levels_": 0, "i_loss_": np.inf, "f_loss_": np.inf, "n_iter_": 0, "pZ_": None, "pYgZ_": None, "pRgZ_": None, "pXgZ_": None, "n_events_": 0, "n_users_": 0, "n_items_": 0, "_q": None, "score_levels_": None, "use_expectation": True, }, ) # import logging # logging.getLogger('kamrecsys').addHandler(logging.StreamHandler()) rcmdr.fit(data) self.assertAlmostEqual(rcmdr.i_loss_, 5.41836900049, delta=1e-5) self.assertAlmostEqual(rcmdr.f_loss_, 5.17361298499, delta=1e-5) assert_allclose(rcmdr.score_levels_, [1, 2, 3, 4, 5], rtol=1e-5) # known user and item self.assertAlmostEqual(rcmdr.predict((1, 7)), 3.64580117249, delta=1e-5) self.assertAlmostEqual(rcmdr.predict((1, 9)), 3.6587422493, delta=1e-5) self.assertAlmostEqual(rcmdr.predict((5, 7)), 3.60707987724, delta=1e-5) self.assertAlmostEqual(rcmdr.predict((5, 9)), 3.62184516985, delta=1e-5) # known user and unknown item self.assertAlmostEqual(rcmdr.predict((1, 11)), 3.66032199689, delta=1e-5) self.assertAlmostEqual(rcmdr.predict((5, 12)), 3.62387542269, delta=1e-5) # unknown user and known item self.assertAlmostEqual(rcmdr.predict((3, 7)), 3.60821491793, delta=1e-5) self.assertAlmostEqual(rcmdr.predict((11, 9)), 3.62304301551, delta=1e-5) # unknown user and item self.assertAlmostEqual(rcmdr.predict((3, 11)), 3.62507437787, delta=1e-5) x = np.array([[1, 7], [1, 9], [1, 11], [3, 7], [3, 9], [3, 11], [5, 7], [5, 9], [5, 11]]) assert_allclose( rcmdr.predict(x), [ 3.64580117249, 3.6587422493, 3.66032199689, 3.60821491793, 3.62304301551, 3.62507437787, 3.60707987724, 3.62184516985, 3.62387542269, ], rtol=1e-5, ) rcmdr.use_expectation = False assert_allclose(rcmdr.predict(x), [4.0, 5.0, 5.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0], rtol=1e-5)
def training(opt, ev, tsc, event_feature=None, fold=0): """ training model Parameters ---------- opt : dict parsed command line options ev : array, size=(n_events, 2), dtype=np.int array of events in external ids tsc : array, size=(n_events,), dtype=np.float true scores event_feature : optional, structured array structured array of event features fold : int, default=0 fold No. Returns ------- rcmdr : EventScorePredictor trained recommender """ # generate event data data = EventWithScoreData(n_otypes=2, n_stypes=1) if np.all(opt.domain == [0, 0, 0]): score_domain = (np.min(tsc), np.max(tsc), np.min(np.diff(np.unique(tsc)))) else: score_domain = tuple(opt.domain) logger.info("score_domain = " + str(score_domain)) data.set_events(ev, tsc, score_domain=score_domain, event_feature=event_feature) # init learning results if 'training_start_time' not in opt: opt.training_start_time = [0] * opt.fold if 'training_end_time' not in opt: opt.training_end_time = [0] * opt.fold if 'learning_i_loss' not in opt: opt.learning_i_loss = [np.inf] * opt.fold if 'learning_f_loss' not in opt: opt.learning_f_loss = [np.inf] * opt.fold if 'learning_n_iter' not in opt: opt.learning_n_iter = [0] * opt.fold # set starting time start_time = datetime.datetime.now() start_utime = os.times()[0] opt.training_start_time[fold] = start_time.isoformat() logger.info("training_start_time = " + start_time.isoformat()) # create and learning model if opt.method == 'plsam': rcmdr = EventScorePredictor(alpha=opt.alpha, k=opt.k, tol=opt.tol, maxiter=opt.maxiter, use_expectation=True, random_state=opt.rseed) else: rcmdr = EventScorePredictor(alpha=opt.alpha, k=opt.k, tol=opt.tol, maxiter=opt.maxiter, use_expectation=False, random_state=opt.rseed) rcmdr.fit(data) # set end and elapsed time end_time = datetime.datetime.now() end_utime = os.times()[0] elapsed_time = end_time - start_time elapsed_utime = end_utime - start_utime opt.training_end_time[fold] = end_time.isoformat() logger.info("training_end_time = " + end_time.isoformat()) if 'training_elapsed_time' not in opt: opt.training_elapsed_time = elapsed_time else: opt.training_elapsed_time += elapsed_time logger.info("training_elapsed_time = " + str(opt.training_elapsed_time)) if 'training_elapsed_utime' not in opt: opt.training_elapsed_utime = elapsed_utime else: opt.training_elapsed_utime += elapsed_utime logger.info("training_elapsed_utime = " + str(opt.training_elapsed_utime)) # preserve optimizer's outputs opt.learning_i_loss[fold] = rcmdr.i_loss_ opt.learning_f_loss[fold] = rcmdr.f_loss_ opt.learning_n_iter[fold] = rcmdr.n_iter_ return rcmdr