Python phoc 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: src.util.phoc_util

메소드/함수: phoc

hotexamples.com에서의 예제들: 9

Python phoc - 9개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 src.util.phoc_util.phoc에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

 def phoc_list(self):
     """
     :return: list of PHOC in this dataset
     """
     return [
         phoc_util.phoc(word=word,
                        alphabet=self.alphabet,
                        levels=self.phoc_levels) for word in self.word_list
     ]

예제 #2

파일 보기

파일: cca.py 프로젝트: mspringe/Attribute-CNNs-fuer-die-Einzelworterkennung

 def words(self, words):
     """setting this property refreshes the train-data/ list of respecive PHOC aswell"""
     self._words = list(words)
     self.phoc = [
         phoc_util.phoc(word=w,
                        alphabet=self.__alphabet,
                        levels=self.__phoc_level).astype(float)
         for w in self._words
     ]
     self.phoc = np.array(self.phoc)

예제 #3

파일 보기

    def words(self, new_words):
        """
        Setting the lexicon requires calculating the respective PHOC.
        As this estimator is solely based on the distances to the lexicons PHOC, we have to ensure their sanity
        and warn, if impurities should occur.

        .. note::

            self.ambiguous contains problematic words.

        :param new_words: New lexicon
        """
        # updating the lexicon
        self.__words = np.array(list(set(new_words)))
        # updating the PHOC
        self.phoc = [phoc_util.phoc(word=w, alphabet=self.__alphabet, levels=self.__phoc_level)
                      for w in self.__words]
        # eliminating zero PHOC vectors (those would inherently be the nearest neighbour for the cosine distance and we
        # we shall only  consider words that we can generate a representation for)
        self.phoc = np.array(self.phoc)
        sums = self.phoc.sum(axis=1)
        if any(sums == 0):
            warnings.warn('{} zero phocs\n{}'.format((sums == 0).sum(), self.__words[sums==0]))
        self.phoc = self.phoc[sums > 0]
        self.__words = self.__words[sums > 0]
        # checking for ambiguous PHOC and warn
        same_taken = np.zeros(self.phoc.shape[0], dtype=int)
        same_pairs = []
        same = 0
        for i in range(len(self.phoc)-1):
            zs = np.zeros(i + 1, dtype=int)
            tail = np.array(list(map(all, self.phoc[i] == self.phoc[i + 1:]))).astype(int)
            tmp_same = np.concatenate([zs, tail])
            tmp_same -= same_taken
            tmp_same[tmp_same < 0] = 0
            # pairs of words with identical PHOC
            if tmp_same.sum() > 0:
                same += 1
                same_pairs.append((self.__words[i], self.__words[tmp_same.astype(bool)]))
            same_taken += tmp_same
            same_taken[same_taken > 1] = 1
        # gathering ambiguous PHOC
        if same > 0:
            warnings.warn('{} same phocs out of {}\n{}'.format(same, len(self.phoc), same_pairs))
        # gathering ambiguous words, if this set is to large, you might want to use deeper PHOC (more levels)
        for w, pair in same_pairs:
            self.ambiguous.append(w)
            for v in pair:
                self.ambiguous.append(v)
        self.ambiguous = list(set(self.ambiguous))
        # discarding unambiguous PHOC if desired
        if self.unambiguous:
            self.__words = self.__words[~same_taken.astype(bool)]
            self.phoc = self.phoc[~same_taken.astype(bool)]

예제 #4

파일 보기

파일: prob.py 프로젝트: mspringe/Attribute-CNNs-fuer-die-Einzelworterkennung

    def words(self, new_words):
        """
        The PHOC have to updated with the lexicon

        :param new_words:  new lexicon
        """
        # updating PHOC-table
        self.train_data = [
            phoc_util.phoc(word=w,
                           alphabet=self.__alphabet,
                           levels=self.__phoc_level) for w in new_words
        ]
        self._words = new_words

예제 #5

파일 보기

    def phoc(self, idx):
        """
        Generates the PHOC. The PHOC depends on the global variables self.alphabet, self.phoc_levels.
        (see also :func:`src.util.phoc_util.phoc`)

        :param idx: indesx of item
        :return: respective PHOC
        """
        transcript = self.transcript(idx)
        phoc = phoc_util.phoc(transcript,
                              alphabet=self.alphabet,
                              levels=self.phoc_levels)
        return phoc

예제 #6

파일 보기

파일: prob.py 프로젝트: mspringe/Attribute-CNNs-fuer-die-Einzelworterkennung

    def __init__(self,
                 words,
                 phoc_level=phoc_util.DEFAULT_PHOC_LEVELS,
                 alphabet=[
                     Alphabet.ASCII_LOWER, Alphabet.ASCII_DIGITS,
                     Alphabet.ASCII_PUNCTUATION
                 ]):
        """
        tain_data is initialized with the PHOC encodings of the handed words
        words will be stored seperately

        :param words: words in dictionary
        :param phoc_level: levels of PHOC-encoding
        :param alphabet: alphabet used for PHOC (see :class:`phoc_util.Alphabet`)
        """
        self.__phoc_level = phoc_level
        self.__alphabet = alphabet
        super().__init__(words, [
            phoc_util.phoc(
                word=w, alphabet=self.__alphabet, levels=self.__phoc_level)
            for w in words
        ])
        self.words = words

예제 #7

파일 보기

파일: phoc_util.py 프로젝트: mspringe/Attribute-CNNs-fuer-die-Einzelworterkennung

 def test_phoc(self):
     alphabet = [
         phoc_util.Alphabet.ASCII_LOWER, phoc_util.Alphabet.ASCII_UPPER,
         phoc_util.Alphabet.ASCII_DIGITS,
         phoc_util.Alphabet.ASCII_PUNCTUATION
     ]
     chars = phoc_util.alphabet_chars(alphabet)
     levels = 2
     word = 'aAzZ19.,'
     # building PHOC manually
     phoc_2 = np.zeros(len(chars), dtype=np.uint8)
     for char in word:
         phoc_2[chars.index(char)] = 1
     phoc_1_1 = np.zeros(len(chars), dtype=np.uint8)
     for char in word[:int(len(word) / 2)]:
         phoc_1_1[chars.index(char)] = 1
     phoc_1_2 = np.zeros(len(chars), dtype=np.uint8)
     for char in word[int(len(word) / 2):]:
         phoc_1_2[chars.index(char)] = 1
     phoc = np.concatenate((phoc_2, phoc_1_1, phoc_1_2))
     # test
     test_phoc = phoc_util.phoc(word=word, alphabet=alphabet, levels=levels)
     self.assertEqual(phoc.dtype, test_phoc.dtype)
     np.testing.assert_array_equal(phoc, test_phoc)

예제 #8

파일 보기

파일: cca.py 프로젝트: mspringe/Attribute-CNNs-fuer-die-Einzelworterkennung

 def setUp(self):
     self.words = ['cat', 'dog', 'fox']
     self.phocs = [phoc(w) for w in self.words]
     self.rcca = cca.RCCAEstimator(self.words)

예제 #9

파일 보기

 def test_estimate(self):
     # estimate and check for results
     query = [phoc('cat')]
     query_words = ['cat']
     self.assertEqual(self.prm.estimate_set(query), query_words)