Python Vocab.count Examples

Programming Language: Python

Namespace/Package Name: gensim.models.word2vec

Class/Type: Vocab

Method/Function: count

Examples at hotexamples.com: 4

Python Vocab.count - 4 examples found. These are the top rated real world Python examples of gensim.models.word2vec.Vocab.count extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Vocab(26)

index(6)

count(2)

sample_probability(2)

code(1)

Example #1

Show file

    def build_vocab(self, corpus):
        """
        Build vocabulary from a sequence of sentences or from a frequency dictionary, if one was provided.
        """
        if self.vocabulary_counts != None:
            logger.debug("building vocabulary from provided frequency map")
            vocab = self.vocabulary_counts
        else:
            logger.debug("default vocabulary building")
            super(Skipgram, self).build_vocab(corpus)
            return

        # assign a unique index to each word
        self.vocab, self.index2word = {}, []

        for word, count in vocab.iteritems():
            v = Vocab()
            v.count = count
            if v.count >= self.min_count:
                v.index = len(self.vocab)
                self.index2word.append(word)
                self.vocab[word] = v

        logger.debug("total %i word types after removing those with count<%s" %
                     (len(self.vocab), self.min_count))

        if self.hs:
            # add info about each word's Huffman encoding
            self.create_binary_tree()
        if self.negative:
            # build the table for drawing random words (for negative sampling)
            self.make_table()
        # precalculate downsampling thresholds
        self.precalc_sampling()
        self.reset_weights()

Example #2

Show file

    def build_vocab(self, corpus):
        """
        Build vocabulary from a sequence of sentences or from a frequency dictionary, if one was provided.
        """
        if self.vocabulary_counts != None:
            print "building vocabulary from provided frequency map"
            vocab = self.vocabulary_counts
        else:
            print "default vocabulary building"
            super(Skipgram, self).build_vocab(corpus)
            return

        # assign a unique index to each word
        self.vocab, self.index2word = {}, []

        for word, count in vocab.iteritems():
            v = Vocab()
            v.count = count
            if v.count >= self.min_count:
                v.index = len(self.vocab)
                self.index2word.append(word)
                self.vocab[word] = v

        self.corpus_count = len(vocab)
        self.raw_vocab = vocab

        logger.debug("total %i word types after removing those with count<%s" %
                     (len(self.vocab), self.min_count))

        self.scale_vocab()
        self.finalize_vocab()

Example #3

Show file

File: skipgram.py Project: chihming/deepwalk

    def build_vocab(self, corpus):
        """
        Build vocabulary from a sequence of sentences or from a frequency dictionary, if one was provided.
        """
        if self.vocabulary_counts != None:
          print "building vocabulary from provided frequency map"
          vocab = self.vocabulary_counts
        else:
          print "default vocabulary building"
          super(Skipgram, self).build_vocab(corpus)
          return

        # assign a unique index to each word
        self.vocab, self.index2word = {}, []

        for word, count in vocab.iteritems():
            v = Vocab()
            v.count = count
            if v.count >= self.min_count:
                v.index = len(self.vocab)
                self.index2word.append(word)
                self.vocab[word] = v

        self.corpus_count = len(vocab)
        self.raw_vocab = vocab

        logger.debug("total %i word types after removing those with count<%s" % (len(self.vocab), self.min_count))

        self.scale_vocab()
        self.finalize_vocab()

Example #4

Show file

File: skipgram.py Project: WellyZhang/DeepWalk

    def build_vocab(self, corpus):
        """
        Build vocabulary from a sequence of sentences or from a frequency dictionary, if one was provided.
        """
        if self.vocabulary_counts != None:
            logger.debug("building vocabulary from provided frequency map")
            vocab = self.vocabulary_counts
        else:
            logger.debug("default vocabulary building")
            super(Skipgram, self).build_vocab(corpus)
            return

        # assign a unique index to each word
        self.vocab, self.index2word = {}, []

        for word, count in vocab.iteritems():
            v = Vocab()
            v.count = count
            if v.count >= self.min_count:
                v.index = len(self.vocab)
                self.index2word.append(word)
                self.vocab[word] = v

        logger.debug("total %i word types after removing those with count<%s" % (len(self.vocab), self.min_count))

        if self.hs:
            # add info about each word's Huffman encoding
            self.create_binary_tree()
        if self.negative:
            # build the table for drawing random words (for negative sampling)
            self.make_table()
        # precalculate downsampling thresholds
        self.precalc_sampling()
        self.reset_weights()