Ejemplos de phoc en Python, ejemplos de src.util.phoc_util.phoc en Python

Ejemplo n.º 1

0

Mostrar archivo

 def phoc_list(self):
     """
     :return: list of PHOC in this dataset
     """
     return [
         phoc_util.phoc(word=word,
                        alphabet=self.alphabet,
                        levels=self.phoc_levels) for word in self.word_list
     ]

Ejemplo n.º 2

0

Mostrar archivo

Archivo: cca.py Proyecto: mspringe/Attribute-CNNs-fuer-die-Einzelworterkennung

 def words(self, words):
     """setting this property refreshes the train-data/ list of respecive PHOC aswell"""
     self._words = list(words)
     self.phoc = [
         phoc_util.phoc(word=w,
                        alphabet=self.__alphabet,
                        levels=self.__phoc_level).astype(float)
         for w in self._words
     ]
     self.phoc = np.array(self.phoc)

Ejemplo n.º 3

0

Mostrar archivo

    def words(self, new_words):
        """
        Setting the lexicon requires calculating the respective PHOC.
        As this estimator is solely based on the distances to the lexicons PHOC, we have to ensure their sanity
        and warn, if impurities should occur.

        .. note::

            self.ambiguous contains problematic words.

        :param new_words: New lexicon
        """
        # updating the lexicon
        self.__words = np.array(list(set(new_words)))
        # updating the PHOC
        self.phoc = [phoc_util.phoc(word=w, alphabet=self.__alphabet, levels=self.__phoc_level)
                      for w in self.__words]
        # eliminating zero PHOC vectors (those would inherently be the nearest neighbour for the cosine distance and we
        # we shall only  consider words that we can generate a representation for)
        self.phoc = np.array(self.phoc)
        sums = self.phoc.sum(axis=1)
        if any(sums == 0):
            warnings.warn('{} zero phocs\n{}'.format((sums == 0).sum(), self.__words[sums==0]))
        self.phoc = self.phoc[sums > 0]
        self.__words = self.__words[sums > 0]
        # checking for ambiguous PHOC and warn
        same_taken = np.zeros(self.phoc.shape[0], dtype=int)
        same_pairs = []
        same = 0
        for i in range(len(self.phoc)-1):
            zs = np.zeros(i + 1, dtype=int)
            tail = np.array(list(map(all, self.phoc[i] == self.phoc[i + 1:]))).astype(int)
            tmp_same = np.concatenate([zs, tail])
            tmp_same -= same_taken
            tmp_same[tmp_same < 0] = 0
            # pairs of words with identical PHOC
            if tmp_same.sum() > 0:
                same += 1
                same_pairs.append((self.__words[i], self.__words[tmp_same.astype(bool)]))
            same_taken += tmp_same
            same_taken[same_taken > 1] = 1
        # gathering ambiguous PHOC
        if same > 0:
            warnings.warn('{} same phocs out of {}\n{}'.format(same, len(self.phoc), same_pairs))
        # gathering ambiguous words, if this set is to large, you might want to use deeper PHOC (more levels)
        for w, pair in same_pairs:
            self.ambiguous.append(w)
            for v in pair:
                self.ambiguous.append(v)
        self.ambiguous = list(set(self.ambiguous))
        # discarding unambiguous PHOC if desired
        if self.unambiguous:
            self.__words = self.__words[~same_taken.astype(bool)]
            self.phoc = self.phoc[~same_taken.astype(bool)]

Ejemplo n.º 4

0

Mostrar archivo

Archivo: prob.py Proyecto: mspringe/Attribute-CNNs-fuer-die-Einzelworterkennung

    def words(self, new_words):
        """
        The PHOC have to updated with the lexicon

        :param new_words:  new lexicon
        """
        # updating PHOC-table
        self.train_data = [
            phoc_util.phoc(word=w,
                           alphabet=self.__alphabet,
                           levels=self.__phoc_level) for w in new_words
        ]
        self._words = new_words

Ejemplo n.º 5

0

Mostrar archivo

    def phoc(self, idx):
        """
        Generates the PHOC. The PHOC depends on the global variables self.alphabet, self.phoc_levels.
        (see also :func:`src.util.phoc_util.phoc`)

        :param idx: indesx of item
        :return: respective PHOC
        """
        transcript = self.transcript(idx)
        phoc = phoc_util.phoc(transcript,
                              alphabet=self.alphabet,
                              levels=self.phoc_levels)
        return phoc

Ejemplo n.º 6

0

Mostrar archivo

Archivo: prob.py Proyecto: mspringe/Attribute-CNNs-fuer-die-Einzelworterkennung

    def __init__(self,
                 words,
                 phoc_level=phoc_util.DEFAULT_PHOC_LEVELS,
                 alphabet=[
                     Alphabet.ASCII_LOWER, Alphabet.ASCII_DIGITS,
                     Alphabet.ASCII_PUNCTUATION
                 ]):
        """
        tain_data is initialized with the PHOC encodings of the handed words
        words will be stored seperately

        :param words: words in dictionary
        :param phoc_level: levels of PHOC-encoding
        :param alphabet: alphabet used for PHOC (see :class:`phoc_util.Alphabet`)
        """
        self.__phoc_level = phoc_level
        self.__alphabet = alphabet
        super().__init__(words, [
            phoc_util.phoc(
                word=w, alphabet=self.__alphabet, levels=self.__phoc_level)
            for w in words
        ])
        self.words = words

Ejemplo n.º 7

0

Mostrar archivo

Archivo: phoc_util.py Proyecto: mspringe/Attribute-CNNs-fuer-die-Einzelworterkennung

 def test_phoc(self):
     alphabet = [
         phoc_util.Alphabet.ASCII_LOWER, phoc_util.Alphabet.ASCII_UPPER,
         phoc_util.Alphabet.ASCII_DIGITS,
         phoc_util.Alphabet.ASCII_PUNCTUATION
     ]
     chars = phoc_util.alphabet_chars(alphabet)
     levels = 2
     word = 'aAzZ19.,'
     # building PHOC manually
     phoc_2 = np.zeros(len(chars), dtype=np.uint8)
     for char in word:
         phoc_2[chars.index(char)] = 1
     phoc_1_1 = np.zeros(len(chars), dtype=np.uint8)
     for char in word[:int(len(word) / 2)]:
         phoc_1_1[chars.index(char)] = 1
     phoc_1_2 = np.zeros(len(chars), dtype=np.uint8)
     for char in word[int(len(word) / 2):]:
         phoc_1_2[chars.index(char)] = 1
     phoc = np.concatenate((phoc_2, phoc_1_1, phoc_1_2))
     # test
     test_phoc = phoc_util.phoc(word=word, alphabet=alphabet, levels=levels)
     self.assertEqual(phoc.dtype, test_phoc.dtype)
     np.testing.assert_array_equal(phoc, test_phoc)

Ejemplo n.º 8

0

Mostrar archivo

Archivo: cca.py Proyecto: mspringe/Attribute-CNNs-fuer-die-Einzelworterkennung

 def setUp(self):
     self.words = ['cat', 'dog', 'fox']
     self.phocs = [phoc(w) for w in self.words]
     self.rcca = cca.RCCAEstimator(self.words)

Ejemplo n.º 9

0

Mostrar archivo

 def test_estimate(self):
     # estimate and check for results
     query = [phoc('cat')]
     query_words = ['cat']
     self.assertEqual(self.prm.estimate_set(query), query_words)