def test_load_vocab_and_item_size(self): with _testutil.patch_download_and_extract_data(self.movielens_dir): test_loader = _dl.RecommendationDataLoader.from_movielens( self.generated_dir, 'test', self.test_tempdir) vocab, item_size = test_loader.load_vocab_and_item_size() self.assertEqual(len(vocab), _testutil.VOCAB_SIZE) self.assertEqual(item_size, _testutil.ITEM_SIZE)
def test_from_movielens(self): with _testutil.patch_download_and_extract_data(self.movielens_dir): train_loader = _dl.RecommendationDataLoader.from_movielens( self.generated_dir, 'train', self.test_tempdir) test_loader = _dl.RecommendationDataLoader.from_movielens( self.generated_dir, 'test', self.test_tempdir) self.assertEqual(len(train_loader), _testutil.TRAIN_SIZE) self.assertIsNotNone(train_loader._dataset) self.assertEqual(len(test_loader), _testutil.TEST_SIZE) self.assertIsNotNone(test_loader._dataset)
def test_split(self): with _testutil.patch_download_and_extract_data(self.movielens_dir): test_loader = _dl.RecommendationDataLoader.from_movielens( self.generated_dir, 'test', self.test_tempdir) test0, test1 = test_loader.split(0.1) expected_size0 = int(0.1 * _testutil.TEST_SIZE) expected_size1 = _testutil.TEST_SIZE - expected_size0 self.assertEqual(len(test0), expected_size0) self.assertIsNotNone(test0._dataset) self.assertEqual(len(test1), expected_size1) self.assertIsNotNone(test1._dataset)
def test_recommendation_demo(self): with _rt.patch_download_and_extract_data(self.dataset_dir): data_dir = recommendation_demo.download_data(self.download_dir) self.assertEqual(data_dir, self.dataset_dir) export_dir = os.path.join(self.test_tempdir, 'export') tflite_filename = os.path.join(export_dir, 'model.tflite') with patch_data_loader(): recommendation_demo.run(data_dir, export_dir, epochs=1) self.assertTrue(tf.io.gfile.exists(tflite_filename)) self.assertGreater(os.path.getsize(tflite_filename), 0)
def setUp(self): super().setUp() _testutil.setup_fake_testdata(self) with _testutil.patch_download_and_extract_data(self.movielens_dir): self.train_loader = _dl.RecommendationDataLoader.from_movielens( self.generated_dir, 'train', self.test_tempdir) self.test_loader = _dl.RecommendationDataLoader.from_movielens( self.generated_dir, 'test', self.test_tempdir) self.model_spec_options = dict( context_embedding_dim=16, label_embedding_dim=16, item_vocab_size=self.test_loader.max_vocab_id, hidden_layer_dim_ratios=[1, 1], )
def test_from_movielens(self): with _testutil.patch_download_and_extract_data(self.movielens_dir): train_loader = _dl.RecommendationDataLoader.from_movielens( self.generated_dir, 'train', self.test_tempdir) test_loader = _dl.RecommendationDataLoader.from_movielens( self.generated_dir, 'test', self.test_tempdir) self.assertEqual(len(train_loader), _testutil.TRAIN_SIZE) self.assertIsNotNone(train_loader._dataset) self.assertIsInstance(train_loader.vocab, collections.OrderedDict) self.assertEqual(len(train_loader.vocab), _testutil.VOCAB_SIZE) self.assertEqual(train_loader.max_vocab_id, _testutil.MAX_ITEM_ID) self.assertEqual(len(test_loader), _testutil.TEST_SIZE) self.assertIsNotNone(test_loader._dataset) self.assertEqual(len(test_loader.vocab), _testutil.VOCAB_SIZE) self.assertIsInstance(test_loader.vocab, collections.OrderedDict) self.assertEqual(test_loader.max_vocab_id, _testutil.MAX_ITEM_ID)
def test_prepare_movielens_datasets(self): loader = _dl.RecommendationDataLoader with _testutil.patch_download_and_extract_data(self.movielens_dir): stats = loader._prepare_movielens_datasets( self.test_tempdir, self.generated_dir, 'train.tfrecord', 'test.tfrecord', 'movie_vocab.json', 'meta.json') self.assertDictContainsSubset( { 'train_file': os.path.join(self.generated_dir, 'train.tfrecord'), 'test_file': os.path.join(self.generated_dir, 'test.tfrecord'), 'vocab_file': os.path.join(self.generated_dir, 'movie_vocab.json'), 'train_size': _testutil.TRAIN_SIZE, 'test_size': _testutil.TEST_SIZE, 'vocab_size': _testutil.VOCAB_SIZE, }, stats) self.assertTrue(os.path.exists(self.movielens_dir)) self.assertGreater(len(os.listdir(self.movielens_dir)), 0) meta_file = os.path.join(self.generated_dir, 'meta.json') self.assertTrue(os.path.exists(meta_file))
def test_gen_dataset(self): with _testutil.patch_download_and_extract_data(self.movielens_dir): test_loader = _dl.RecommendationDataLoader.from_movielens( self.generated_dir, 'test', self.test_tempdir) ds = test_loader.gen_dataset(10, is_training=False) self.assertIsInstance(ds, tf.data.Dataset)
def test_download_and_extract_data(self): with _testutil.patch_download_and_extract_data(self.dataset_dir) as fn: out_dir = _dl.RecommendationDataLoader.download_and_extract_movielens( self.download_dir) fn.called_once_with(self.download_dir) self.assertEqual(out_dir, self.dataset_dir)