def test_load_yaml(self): p = BaseEncoder.load_yaml(os.path.join(self.dirname, 'yaml', 'pipeline-multi-encoder.yml')) self.assertRaises(RuntimeError, p.encode, 1) p.train(1) self.assertEqual(5, p.encode(1)) p = BaseEncoder.load_yaml(os.path.join(self.dirname, 'yaml', 'pipeline-multi-encoder.yml')) self.assertRaises(RuntimeError, p.encode, 1)
def test_dump_load(self): self.encoder = BaseEncoder.load_yaml(self.yaml_path) self.encoder.dump(self.dump_path) encoder2 = BaseEncoder.load(self.dump_path) vec = encoder2.encode(self.test_frames) self.assertEqual(vec.shape[0], 3) self.assertEqual(vec.shape[1], 19310)
def test_base(self): a = BaseEncoder.load_yaml(self.yaml_path) self.assertFalse(a.is_trained) # simulate training a.is_trained = True a.dump() os.path.exists(self.dump_path) # load the dump from yaml b = BaseEncoder.load_yaml(self.yaml_path) self.assertTrue(b.is_trained)
def test_dump_load(self): self.encoder = BaseEncoder.load_yaml(self.yaml_path) self.encoder.dump(self.dump_path) encoder2 = BaseEncoder.load(self.dump_path) for test_img in self.test_img: vec = encoder2.encode(test_img) self.assertEqual(vec.shape[0], 2) self.assertEqual(vec.shape[1], 2048)
def test_encoding(self): self.encoder = BaseEncoder.load_yaml(self.yaml_path) # train before encode to create pca_components self.encoder.train(self.test_numeric) vec = self.encoder.encode(self.test_numeric) self.assertEqual(vec.shape, (1000, 300)) # dump after train with valied pca_components self.encoder.dump(self.dump_path) encoder2 = BaseEncoder.load(self.dump_path) vec = encoder2.encode(self.test_numeric) self.assertEqual(vec.shape, (1000, 300))
def test_load_contrib(self): os.environ['GNES_CONTRIB_MODULE'] = '%s:%s' % (cls_name, module_path) from gnes.encoder.base import BaseEncoder, BaseTextEncoder a = BaseEncoder.load_yaml(self.yaml_path) self.assertIsInstance(a, BaseTextEncoder) self.assertEqual(a.encode([]), 'hello 531') a.dump() a.dump_yaml(self.dump_yaml_path) b = BaseEncoder.load_yaml(self.dump_yaml_path) self.assertIsInstance(b, BaseTextEncoder) self.assertEqual(b.encode([]), 'hello 531')
def test_dump_load(self): self.encoder = BaseEncoder.load_yaml(self.vgg_yaml) self.encoder.dump(self.dump_path) vgg_encoder2 = BaseEncoder.load(self.dump_path) for test_img in self.test_img: vec = vgg_encoder2.encode(test_img) self.assertEqual(vec.shape[0], len(test_img)) self.assertEqual(vec.shape[1], 4096)
def test_base(self): a = BaseEncoder.load_yaml(self.yaml_path) self.assertFalse(a.is_trained) for c in a.components: c.is_trained = True a.dump() os.path.exists(self.dump_path) # load the dump from yaml b = BaseEncoder.load_yaml(self.yaml_path) self.assertTrue(b.is_trained)
def test_name_warning(self): d1 = DummyTFEncoder() d2 = DummyTFEncoder() d1.name = '' d2.name = '' d3 = PipelineEncoder() d3.components = lambda: [d1, d2] d3.name = 'dummy-pipeline' d3.work_dir = './' d3.dump() d3.dump_yaml() print('there should not be any warning after this line') BaseEncoder.load_yaml(d3.yaml_full_path)
def test_inception_encoding(self): self.encoder = BaseEncoder.load_yaml(self.inception_yaml) for test_img in self.test_img: vec = self.encoder.encode(test_img) print("the length of data now is:", len(test_img)) self.assertEqual(vec.shape[0], len(test_img)) self.assertEqual(vec.shape[1], 2048)
def test_mobilenet_encoding(self): self.encoder = BaseEncoder.load_yaml(self.mobilenet_yaml) for test_img in self.test_img: vec = self.encoder.encode(test_img) print("the length of data now is:", len(test_img)) self.assertEqual(vec.shape[0], len(test_img)) self.assertEqual(vec.shape[1], 1280)
def test_dump_load(self): self.tt_encoder.dump(self.dump_path) tt_encoder2 = BaseEncoder.load(self.dump_path) vec = tt_encoder2.encode(self.test_str) self.assertEqual(vec.shape[0], len(self.test_str)) self.assertEqual(vec.shape[1], 768)
def test_inception_encoding(self): self.encoder = BaseEncoder.load_yaml(self.inception_yaml) vec = self.encoder.encode(self.test_img) self.assertEqual(len(vec), len(self.test_img)) self.assertEqual(len(vec[0].shape), 2) self.assertEqual(vec[0].shape[0], self.test_img[0].shape[0]) self.assertEqual(vec[0].shape[1], 1152)
def test_dummytf(self): d1 = DummyTFEncoder() self.assertEqual(d1.encode(1), 2) self.assertTrue(d1.is_trained) d1.dump() d11 = BaseEncoder.load(d1.dump_full_path) self.assertTrue(d11.is_trained) d2 = DummyTFEncoder() self.assertEqual(d2.encode(2), 3) self.assertTrue(d2.is_trained) d3 = PipelineEncoder() d3.component = lambda: [d1, d2] self.assertEqual(d3.encode(1), 3) self.assertFalse(d3.is_trained) self.assertTrue(d3.component[0].is_trained) self.assertTrue(d3.component[1].is_trained) d3.dump() d31 = BaseEncoder.load(d3.dump_full_path) self.assertFalse(d31.is_trained) self.assertTrue(d31.component[0].is_trained) self.assertTrue(d31.component[1].is_trained) d3.work_dir = self.dirname d3.name = 'dummy-pipeline' d3.is_trained = True d3.dump_yaml() d3.dump() d4 = PipelineEncoder.load(d3.dump_full_path) self.assertTrue(d4.is_trained) self.assertTrue(d4.component[0].is_trained) self.assertTrue(d4.component[1].is_trained) d4 = PipelineEncoder.load_yaml(d3.yaml_full_path) self.assertTrue(d4.is_trained) self.assertTrue(d4.component[0].is_trained) self.assertTrue(d4.component[1].is_trained) self.assertEqual(d4.encode(4), 6)
def test_pipeline_train(self): p = PipelineEncoder() p.components = lambda: [DummyEncoder(), DummyEncoder(), DummyEncoder()] self.assertRaises(RuntimeError, p.encode, 1) p.train(1) self.assertEqual(4, p.encode(1)) p.name = 'dummy-pipeline' p.dump() p.dump_yaml() a = BaseEncoder.load_yaml(p.yaml_full_path) self.assertEqual(4, a.encode(1))
def setUp(self): dirname = os.path.dirname(__file__) self.dump_path = os.path.join(dirname, 'model.bin') self.text_yaml = os.path.join(dirname, 'yaml', 'torch-transformers-encoder.yml') self.tt_encoder = BaseEncoder.load_yaml(self.text_yaml) self.test_str = [] with open(os.path.join(dirname, 'sonnets_small.txt')) as f: for line in f: line = line.strip() if line: self.test_str.append(line)
def test_load_contrib(self): from gnes.encoder.base import BaseEncoder, BaseTextEncoder a = BaseEncoder.load_yaml(self.yaml_path) self.assertIsInstance(a, BaseTextEncoder) self.assertEqual(a.encode([]), 'hello 531')
def test_vgg_encoding(self): self.encoder = BaseEncoder.load_yaml(self.yaml_path) for test_img in self.test_img: vec = self.encoder.encode(test_img) self.assertEqual(vec.shape[0], 2) self.assertEqual(vec.shape[1], 2048)
def test_yt8m_encoding(self): self.encoder = BaseEncoder.load_yaml(self.yt8m_yaml) vec = self.encoder.encode(self.test_video) self.assertEqual(vec.shape[0], len(self.test_video)) self.assertEqual(vec.shape[1], 19310)
def test_yt8m_encoding(self): self.encoder = BaseEncoder.load_yaml(self.yaml_path) vec = self.encoder.encode(self.test_frames) self.assertEqual(vec.shape[0], 3) self.assertEqual(vec.shape[1], 19310)