def test_ii_implicit_save_load(tmp_path): "Save and load a model" original = knn.ItemItem(30, save_nbrs=500, center=False, aggregate='sum') _log.info('building model') original.fit(lktu.ml_sample().loc[:, ['user', 'item']]) fn = tmp_path / 'ii.mod' _log.info('saving model to %s', fn) with fn.open('wb') as modf: pickle.dump(original, modf) _log.info('reloading model') with fn.open('rb') as modf: algo = pickle.load(modf) _log.info('checking model') assert all(np.logical_not(np.isnan(algo.sim_matrix_.values))) assert all(algo.sim_matrix_.values > 0) # a little tolerance assert all(algo.sim_matrix_.values < 1 + 1.0e-6) assert all(algo.item_counts_ == original.item_counts_) assert algo.item_counts_.sum() == algo.sim_matrix_.nnz assert algo.sim_matrix_.nnz == original.sim_matrix_.nnz assert all(algo.sim_matrix_.rowptrs == original.sim_matrix_.rowptrs) assert algo.sim_matrix_.values == approx(original.sim_matrix_.values) assert algo.rating_matrix_.values is None r_mat = algo.sim_matrix_ o_mat = original.sim_matrix_ assert all(r_mat.rowptrs == o_mat.rowptrs) for i in range(len(algo.item_index_)): sp = r_mat.rowptrs[i] ep = r_mat.rowptrs[i + 1] # everything is in decreasing order assert all(np.diff(r_mat.values[sp:ep]) <= 0) assert all(r_mat.values[sp:ep] == o_mat.values[sp:ep]) assert algo.item_means_ is None matrix = algo.sim_matrix_.to_scipy() items = pd.Series(algo.item_index_) items = items[algo.item_counts_ > 0] for i in items.sample(50): ipos = algo.item_index_.get_loc(i) _log.debug('checking item %d at position %d', i, ipos) row = matrix.getrow(ipos) # it should be sorted ! # check this by diffing the row values, and make sure they're negative assert all(np.diff(row.data) < 1.0e-6)
def test_ii_save_load(tmp_path): "Save and load a model" tmp_path = lktu.norm_path(tmp_path) original = knn.ItemItem(30, save_nbrs=500) _log.info('building model') original.fit(lktu.ml_sample()) fn = tmp_path / 'ii.mod' _log.info('saving model to %s', fn) original.save(fn) _log.info('reloading model') algo = knn.ItemItem(30) algo.load(fn) _log.info('checking model') assert all(np.logical_not(np.isnan(algo.sim_matrix_.values))) assert all(algo.sim_matrix_.values > 0) # a little tolerance assert all(algo.sim_matrix_.values < 1 + 1.0e-6) assert all(algo.item_counts_ == original.item_counts_) assert algo.item_counts_.sum() == algo.sim_matrix_.nnz assert algo.sim_matrix_.nnz == original.sim_matrix_.nnz assert all(algo.sim_matrix_.rowptrs == original.sim_matrix_.rowptrs) assert algo.sim_matrix_.values == approx(original.sim_matrix_.values) r_mat = algo.sim_matrix_ o_mat = original.sim_matrix_ assert all(r_mat.rowptrs == o_mat.rowptrs) for i in range(len(algo.item_index_)): sp = r_mat.rowptrs[i] ep = r_mat.rowptrs[i + 1] # everything is in decreasing order assert all(np.diff(r_mat.values[sp:ep]) <= 0) assert all(r_mat.values[sp:ep] == o_mat.values[sp:ep]) means = ml_ratings.groupby('item').rating.mean() assert means[algo.item_index_].values == approx(original.item_means_) matrix = lm.csr_to_scipy(algo.sim_matrix_) items = pd.Series(algo.item_index_) items = items[algo.item_counts_ > 0] for i in items.sample(50): ipos = algo.item_index_.get_loc(i) _log.debug('checking item %d at position %d', i, ipos) row = matrix.getrow(ipos) # it should be sorted ! # check this by diffing the row values, and make sure they're negative assert all(np.diff(row.data) < 1.0e-6)