Esempio n. 1
0
    def test_contains_id(self):
        dim = 100
        act = 10

        gen = Generator(dim, act)
        sign_index = SignIndex(gen)

        id = 0
        self.assertFalse(sign_index.contains_id(id))
Esempio n. 2
0
    def test_get(self):
        dim = 100
        act = 10

        gen = Generator(dim, act)
        sign_index = SignIndex(gen)

        sign_index.add("0")
        ri0 = sign_index.get_ri("0")
        self.assertIsInstance(ri0, RandomIndex)

        self.assertEqual(ri0.dim, dim)
Esempio n. 3
0
    def test_contains(self):
        dim = 100
        act = 10

        gen = Generator(dim, act)
        sign_index = SignIndex(generator=gen)

        sign_index.add("0")
        self.assertTrue(sign_index.contains("0"))
        self.assertFalse(sign_index.contains("1"))

        sign_index.remove("0")
        self.assertFalse(sign_index.contains("0"))
Esempio n. 4
0
    def test_size(self):
        gen = Generator(100, 10)
        sign_index = SignIndex(generator=gen)

        # adding elements should increase size
        self.assertEqual(len(sign_index), 0)

        sign_index.add("0")
        self.assertEqual(len(sign_index), 1)
        self.assertEqual(sign_index.nextID, sign_index.get_id("0") + 1)

        # duplicated elements are not added
        sign_index.add("0")
        self.assertEqual(len(sign_index), 1)

        sign_index.add("1")
        self.assertEqual(len(sign_index), 2)

        # removing elements should reduce size
        size_before = len(sign_index)

        sign_index.remove("0")
        size_after = len(sign_index)
        self.assertEqual(size_after, size_before - 1)
Esempio n. 5
0
 def setUp(self):
     dim = 10
     act = 4
     self.generator = Generator(dim=dim, num_active=act)
     self.sign_index = SignIndex(self.generator)
Esempio n. 6
0
 def setUp(self):
     dim = 10
     act = 2
     self.generator = Generator(dim=dim, num_active=act)
     self.sign_index = SignIndex(self.generator)
     self.perm_generator = PermutationGenerator(dim=dim)
    result_path = home + "/data/results/"
    corpus_file = home + corpus_file

    print("Reading hdf5 dataset from: ", corpus_file)
    dataset_name = "sentences_lemmatised"

    # open hdf5 file and get the dataset
    h5f = h5py.File(corpus_file, 'r')
    dataset = h5f[dataset_name]
    return dataset

# do something with the dataset

# Create Sign RI Index
ri_gen = Generator(dim=ri_dim, num_active=ri_num_active)
sign_index = SignIndex(ri_gen)

max_sentences = 200000


def load_spacy():
    t0 = time.time()
    # load tokenizer only
    nlp = English(entity=False, load_vectors=False, parser=True, tagger=True)
    t1 = time.time()
    print("Done: {0:.2f} secs ".format(t1 - t0))
    return nlp

nlp = load_spacy()