def test_contains_id(self): dim = 100 act = 10 gen = Generator(dim, act) sign_index = SignIndex(gen) id = 0 self.assertFalse(sign_index.contains_id(id))
def test_get(self): dim = 100 act = 10 gen = Generator(dim, act) sign_index = SignIndex(gen) sign_index.add("0") ri0 = sign_index.get_ri("0") self.assertIsInstance(ri0, RandomIndex) self.assertEqual(ri0.dim, dim)
def test_contains(self): dim = 100 act = 10 gen = Generator(dim, act) sign_index = SignIndex(generator=gen) sign_index.add("0") self.assertTrue(sign_index.contains("0")) self.assertFalse(sign_index.contains("1")) sign_index.remove("0") self.assertFalse(sign_index.contains("0"))
def test_size(self): gen = Generator(100, 10) sign_index = SignIndex(generator=gen) # adding elements should increase size self.assertEqual(len(sign_index), 0) sign_index.add("0") self.assertEqual(len(sign_index), 1) self.assertEqual(sign_index.nextID, sign_index.get_id("0") + 1) # duplicated elements are not added sign_index.add("0") self.assertEqual(len(sign_index), 1) sign_index.add("1") self.assertEqual(len(sign_index), 2) # removing elements should reduce size size_before = len(sign_index) sign_index.remove("0") size_after = len(sign_index) self.assertEqual(size_after, size_before - 1)
def setUp(self): dim = 10 act = 4 self.generator = Generator(dim=dim, num_active=act) self.sign_index = SignIndex(self.generator)
def setUp(self): dim = 10 act = 2 self.generator = Generator(dim=dim, num_active=act) self.sign_index = SignIndex(self.generator) self.perm_generator = PermutationGenerator(dim=dim)
result_path = home + "/data/results/" corpus_file = home + corpus_file print("Reading hdf5 dataset from: ", corpus_file) dataset_name = "sentences_lemmatised" # open hdf5 file and get the dataset h5f = h5py.File(corpus_file, 'r') dataset = h5f[dataset_name] return dataset # do something with the dataset # Create Sign RI Index ri_gen = Generator(dim=ri_dim, num_active=ri_num_active) sign_index = SignIndex(ri_gen) max_sentences = 200000 def load_spacy(): t0 = time.time() # load tokenizer only nlp = English(entity=False, load_vectors=False, parser=True, tagger=True) t1 = time.time() print("Done: {0:.2f} secs ".format(t1 - t0)) return nlp nlp = load_spacy()