예제 #1
0
 def test_load_vocab_and_item_size(self):
   with _testutil.patch_download_and_extract_data(self.movielens_dir):
     test_loader = _dl.RecommendationDataLoader.from_movielens(
         self.generated_dir, 'test', self.test_tempdir)
   vocab, item_size = test_loader.load_vocab_and_item_size()
   self.assertEqual(len(vocab), _testutil.VOCAB_SIZE)
   self.assertEqual(item_size, _testutil.ITEM_SIZE)
예제 #2
0
  def test_from_movielens(self):
    with _testutil.patch_download_and_extract_data(self.movielens_dir):
      train_loader = _dl.RecommendationDataLoader.from_movielens(
          self.generated_dir, 'train', self.test_tempdir)
      test_loader = _dl.RecommendationDataLoader.from_movielens(
          self.generated_dir, 'test', self.test_tempdir)

    self.assertEqual(len(train_loader), _testutil.TRAIN_SIZE)
    self.assertIsNotNone(train_loader._dataset)

    self.assertEqual(len(test_loader), _testutil.TEST_SIZE)
    self.assertIsNotNone(test_loader._dataset)
예제 #3
0
  def test_split(self):
    with _testutil.patch_download_and_extract_data(self.movielens_dir):
      test_loader = _dl.RecommendationDataLoader.from_movielens(
          self.generated_dir, 'test', self.test_tempdir)
    test0, test1 = test_loader.split(0.1)
    expected_size0 = int(0.1 * _testutil.TEST_SIZE)
    expected_size1 = _testutil.TEST_SIZE - expected_size0
    self.assertEqual(len(test0), expected_size0)
    self.assertIsNotNone(test0._dataset)

    self.assertEqual(len(test1), expected_size1)
    self.assertIsNotNone(test1._dataset)
    def test_recommendation_demo(self):
        with _rt.patch_download_and_extract_data(self.dataset_dir):
            data_dir = recommendation_demo.download_data(self.download_dir)
        self.assertEqual(data_dir, self.dataset_dir)

        export_dir = os.path.join(self.test_tempdir, 'export')
        tflite_filename = os.path.join(export_dir, 'model.tflite')
        with patch_data_loader():
            recommendation_demo.run(data_dir, export_dir, epochs=1)

        self.assertTrue(tf.io.gfile.exists(tflite_filename))
        self.assertGreater(os.path.getsize(tflite_filename), 0)
예제 #5
0
    def setUp(self):
        super().setUp()
        _testutil.setup_fake_testdata(self)
        with _testutil.patch_download_and_extract_data(self.movielens_dir):
            self.train_loader = _dl.RecommendationDataLoader.from_movielens(
                self.generated_dir, 'train', self.test_tempdir)
            self.test_loader = _dl.RecommendationDataLoader.from_movielens(
                self.generated_dir, 'test', self.test_tempdir)

        self.model_spec_options = dict(
            context_embedding_dim=16,
            label_embedding_dim=16,
            item_vocab_size=self.test_loader.max_vocab_id,
            hidden_layer_dim_ratios=[1, 1],
        )
예제 #6
0
  def test_from_movielens(self):
    with _testutil.patch_download_and_extract_data(self.movielens_dir):
      train_loader = _dl.RecommendationDataLoader.from_movielens(
          self.generated_dir, 'train', self.test_tempdir)
      test_loader = _dl.RecommendationDataLoader.from_movielens(
          self.generated_dir, 'test', self.test_tempdir)

    self.assertEqual(len(train_loader), _testutil.TRAIN_SIZE)
    self.assertIsNotNone(train_loader._dataset)
    self.assertIsInstance(train_loader.vocab, collections.OrderedDict)
    self.assertEqual(len(train_loader.vocab), _testutil.VOCAB_SIZE)
    self.assertEqual(train_loader.max_vocab_id, _testutil.MAX_ITEM_ID)

    self.assertEqual(len(test_loader), _testutil.TEST_SIZE)
    self.assertIsNotNone(test_loader._dataset)
    self.assertEqual(len(test_loader.vocab), _testutil.VOCAB_SIZE)
    self.assertIsInstance(test_loader.vocab, collections.OrderedDict)
    self.assertEqual(test_loader.max_vocab_id, _testutil.MAX_ITEM_ID)
예제 #7
0
  def test_prepare_movielens_datasets(self):
    loader = _dl.RecommendationDataLoader
    with _testutil.patch_download_and_extract_data(self.movielens_dir):
      stats = loader._prepare_movielens_datasets(
          self.test_tempdir, self.generated_dir, 'train.tfrecord',
          'test.tfrecord', 'movie_vocab.json', 'meta.json')
    self.assertDictContainsSubset(
        {
            'train_file': os.path.join(self.generated_dir, 'train.tfrecord'),
            'test_file': os.path.join(self.generated_dir, 'test.tfrecord'),
            'vocab_file': os.path.join(self.generated_dir, 'movie_vocab.json'),
            'train_size': _testutil.TRAIN_SIZE,
            'test_size': _testutil.TEST_SIZE,
            'vocab_size': _testutil.VOCAB_SIZE,
        }, stats)

    self.assertTrue(os.path.exists(self.movielens_dir))
    self.assertGreater(len(os.listdir(self.movielens_dir)), 0)

    meta_file = os.path.join(self.generated_dir, 'meta.json')
    self.assertTrue(os.path.exists(meta_file))
예제 #8
0
 def test_gen_dataset(self):
   with _testutil.patch_download_and_extract_data(self.movielens_dir):
     test_loader = _dl.RecommendationDataLoader.from_movielens(
         self.generated_dir, 'test', self.test_tempdir)
   ds = test_loader.gen_dataset(10, is_training=False)
   self.assertIsInstance(ds, tf.data.Dataset)
 def test_download_and_extract_data(self):
   with _testutil.patch_download_and_extract_data(self.dataset_dir) as fn:
     out_dir = _dl.RecommendationDataLoader.download_and_extract_movielens(
         self.download_dir)
     fn.called_once_with(self.download_dir)
     self.assertEqual(out_dir, self.dataset_dir)