Python SignIndex Examples

Programming Language: Python

Namespace/Package Name: deepsign.rp.index

Class/Type: SignIndex

Examples at hotexamples.com: 10

Python SignIndex - 10 examples found. These are the top rated real world Python examples of deepsign.rp.index.SignIndex extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SignIndex(7)

add(5)

get_ri(3)

remove(2)

add_all(1)

contains(1)

contains_id(1)

feature_dim(1)

get_id(1)

Example #1

Show file

File: test_sign_index.py Project: davidenunes/deepsign

    def test_contains_id(self):
        dim = 100
        act = 10

        gen = Generator(dim, act)
        sign_index = SignIndex(gen)

        id = 0
        self.assertFalse(sign_index.contains_id(id))

Example #2

Show file

File: test_sign_index.py Project: davidenunes/deepsign

    def test_get(self):
        dim = 100
        act = 10

        gen = Generator(dim, act)
        sign_index = SignIndex(gen)

        sign_index.add("0")
        ri0 = sign_index.get_ri("0")
        self.assertIsInstance(ri0, RandomIndex)

        self.assertEqual(ri0.dim, dim)

Example #3

Show file

File: test_sign_index.py Project: davidenunes/deepsign

    def test_contains(self):
        dim = 100
        act = 10

        gen = Generator(dim, act)
        sign_index = SignIndex(generator=gen)

        sign_index.add("0")
        self.assertTrue(sign_index.contains("0"))
        self.assertFalse(sign_index.contains("1"))

        sign_index.remove("0")
        self.assertFalse(sign_index.contains("0"))

Example #4

Show file

class MyTestCase(unittest.TestCase):
    def setUp(self):
        dim = 10
        act = 4
        self.generator = Generator(dim=dim, num_active=act)
        self.sign_index = SignIndex(self.generator)

    def test_encode_sp_create(self):
        sentence = ["A", "B"]

        for word in sentence:
            self.sign_index.add(word)

        ris = []
        for word in sentence:
            ri = self.sign_index.get_ri(word)
            ris.append(ri)

        result = ris_to_sp_tensor_value(ris, self.sign_index.feature_dim())
        print(result)

    def test_encode_sp_positive(self):
        """
        Testing encoding for positive-only sparse random vectors

        """
        sentence = ["A", "B"]

        for word in sentence:
            self.sign_index.add(word)

        ris = []
        for word in sentence:
            ri = self.sign_index.get_ri(word)
            ris.append(ri)

        result = ris_to_sp_tensor_value(ris, self.sign_index.feature_dim(), all_positive=True)
        print(result)

Example #5

Show file

 def setUp(self):
     dim = 10
     act = 4
     self.generator = Generator(dim=dim, num_active=act)
     self.sign_index = SignIndex(self.generator)

Example #6

Show file

File: test_encode.py Project: davidenunes/deepsign

 def setUp(self):
     dim = 10
     act = 2
     self.generator = Generator(dim=dim, num_active=act)
     self.sign_index = SignIndex(self.generator)
     self.perm_generator = PermutationGenerator(dim=dim)

Example #7

Show file

File: test_encode.py Project: davidenunes/deepsign

class TestEncode(unittest.TestCase):
    def setUp(self):
        dim = 10
        act = 2
        self.generator = Generator(dim=dim, num_active=act)
        self.sign_index = SignIndex(self.generator)
        self.perm_generator = PermutationGenerator(dim=dim)

    def test_bow_create(self):
        data = ["A", "B", "A", "C", "A", "B"]

        for s in data:
            self.sign_index.add(s)

        unique_str = set(data)
        self.assertEqual(len(self.sign_index), len(unique_str))

        windows = windows(data, window_size=1)
        vectors = [enc.to_bow(w, self.sign_index) for w in windows]
        self.assertEqual(len(vectors), len(windows))

    def test_bow_normalise(self):
        data = ["A", "A"]

        for s in data:
            self.sign_index.add(s)

        unique_str = set(data)
        self.assertEqual(len(self.sign_index), len(unique_str))

        windows = windows(data, window_size=1)
        norm_bow = enc.to_bow(windows[0], self.sign_index,normalise=True,include_target=True)
        self.assertEqual(np.max(norm_bow),1)


        unorm_bow = enc.to_bow(windows[0], self.sign_index, normalise=False,include_target=True)
        self.assertEqual(np.max(unorm_bow),2)


    def test_bow_ignore_order(self):
        data1 = ["A", "B"]
        data2 = ["B", "A"]

        for s1, s2 in data1, data2:
            self.sign_index.add(s1)
            self.sign_index.add(s2)

        windows1 = windows(data1, window_size=1)
        windows2 = windows(data2, window_size=1)

        v1 = enc.to_bow(windows1[0], self.sign_index)
        v2 = enc.to_bow(windows2[0], self.sign_index)

        np_test.assert_array_equal(v1, v2)
        np_test.assert_array_equal(v1, v2)

        a_ri = self.sign_index.get_ri("A")
        b_ri = self.sign_index.get_ri("B")

        np_test.assert_array_equal(v1 - a_ri.to_vector(),
                                   b_ri.to_vector())

    def test_bow_dir_create(self):
        data1 = ["A", "B", "C"]
        data2 = ["A", "C", "B"]

        for i in range(len(data1)):
            self.sign_index.add(data1[i])
            self.sign_index.add(data2[i])

        w1 = windows(data1, window_size=2)
        w2 = windows(data2, window_size=2)

        perm = self.perm_generator.matrix()
        v1 = enc.to_bow_dir(w1[0], sign_index=self.sign_index, perm_matrix=perm)
        v2 = enc.to_bow_dir(w2[0], sign_index=self.sign_index, perm_matrix=perm)

        self.assertSetEqual(set(w1[0].right), set(w2[0].right))
        np_test.assert_array_equal(v1, v2)

Example #8

Show file

File: parallel_read_corpus.py Project: davidenunes/deepsign

    result_path = home + "/data/results/"
    corpus_file = home + corpus_file

    print("Reading hdf5 dataset from: ", corpus_file)
    dataset_name = "sentences_lemmatised"

    # open hdf5 file and get the dataset
    h5f = h5py.File(corpus_file, 'r')
    dataset = h5f[dataset_name]
    return dataset

# do something with the dataset

# Create Sign RI Index
ri_gen = Generator(dim=ri_dim, num_active=ri_num_active)
sign_index = SignIndex(ri_gen)

max_sentences = 200000


def load_spacy():
    t0 = time.time()
    # load tokenizer only
    nlp = English(entity=False, load_vectors=False, parser=True, tagger=True)
    t1 = time.time()
    print("Done: {0:.2f} secs ".format(t1 - t0))
    return nlp

nlp = load_spacy()

Example #9

Show file

File: test_sign_index.py Project: davidenunes/deepsign

    def test_size(self):
        gen = Generator(100, 10)
        sign_index = SignIndex(generator=gen)

        # adding elements should increase size
        self.assertEqual(len(sign_index), 0)

        sign_index.add("0")
        self.assertEqual(len(sign_index), 1)
        self.assertEqual(sign_index.nextID, sign_index.get_id("0") + 1)

        # duplicated elements are not added
        sign_index.add("0")
        self.assertEqual(len(sign_index), 1)

        sign_index.add("1")
        self.assertEqual(len(sign_index), 2)

        # removing elements should reduce size
        size_before = len(sign_index)

        sign_index.remove("0")
        size_after = len(sign_index)
        self.assertEqual(size_after, size_before - 1)

Example #10

Show file

from deepsign.data.corpora.pipe import BNCPipe
from deepsign.rp.encode import to_bow
from deepsign.rp.index import SignIndex, Generator
from deepsign.data.iterators import chunk_it, windows

home = os.getenv("HOME")

data_dir = home + "/data/gold_standards/"
corpus_file = data_dir + "bnc.hdf5"

corpus_hdf5 = h5py.File(corpus_file, 'r')
corpus_dataset = corpus_hdf5["sentences"]

n_rows = 1000
sentences = chunk_it(corpus_dataset, n_rows=n_rows, chunk_size=100000)
pipeline = BNCPipe(datagen=sentences, lemmas=True)

ri_gen = Generator(1000, 10)
index = SignIndex(ri_gen)

for s in tqdm(pipeline, total=n_rows):
    index.add_all(s)

    windows = windows(s, window_size=2)

    for window in windows:
        pass
        #words = window.left + window.right
        #ris = [index.get_ri(word).to_vector() for word in words]
        bow = to_bow(window, index, include_target=False, normalise=True)