Python chngrams Examples

Programming Language: Python

Namespace/Package Name: pattern.vector

Method/Function: chngrams

Examples at hotexamples.com: 10

Python chngrams - 10 examples found. These are the top rated real world Python examples of pattern.vector.chngrams extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: genetic.py Project: rossgoodwin/lexiconjure

 def fitness(self, w):
     # *log(lexicon[ch])/log(lexicon[mostCommon])
     return sum(
         0.2 * log(lexicon[ch]) / log(lexicon[mostCommon]) for ch in chngrams(w, 4) if ch in lexicon
     ) * 8 / len(w) + sum(
         0.1 * log(lexicon[ch]) / log(lexicon[mostCommon]) for ch in chngrams(w, 3) if ch in lexicon
     ) * 8 / len(
         w
     )

Example #2

Show file

File: 08-wiktionary.py Project: DevKhokhar/pattern

 def vector(self, name): 
     """ Returns a dictionary with character bigrams and suffix.
         For example, "Felix" => {"Fe":1, "el":1, "li":1, "ix":1, "ix$":1, 5:1}
     """
     v = chngrams(name, n=2)
     v = count(v)
     v[name[-2:]+"$"] = 1
     v[len(name)] = 1
     return v

Example #3

Show file

File: 08-wiktionary.py Project: LiuFang816/SALSTM_py_data

 def vector(self, name):
     """ Returns a dictionary with character bigrams and suffix.
         For example, "Felix" => {"Fe":1, "el":1, "li":1, "ix":1, "ix$":1, 5:1}
     """
     v = chngrams(name, n=2)
     v = count(v)
     v[name[-2:] + "$"] = 1
     v[len(name)] = 1
     return v

Example #4

Show file

 def fitness(self, w):
     #*log(lexicon[ch])/log(lexicon[mostCommon])
     return sum(0.2*log(lexicon[ch])/log(lexicon[mostCommon]) for ch in chngrams(w, 4) if ch in lexicon)*8/len(w) + \
            sum(0.1*log(lexicon[ch])/log(lexicon[mostCommon]) for ch in chngrams(w, 3) if ch in lexicon)*8/len(w)

Example #5

Show file

from pattern.vector import GA, chngrams
# from pattern.en import lexicon
import json
from random import choice
from random import randint as ri
from collections import Counter
from math import log

with open('words.json', 'r') as infile:
    words = json.load(infile)

allgrams = list()
for w in words:
    char3grams = chngrams(w, 3).items()
    char4grams = chngrams(w, 4).items()
    allgrams.extend(char3grams + char4grams)

lexicon = Counter()
for gram in allgrams:
    lexicon[gram[0]] += gram[1]

# print 'lexicon length:', len(lexicon)

mostCommon = max(lexicon.keys(), key=lambda k: lexicon[k])
# print mostCommon, lexicon[mostCommon]

def chseq(length=4, chars='abcdefghijklmnopqrstuvwxyz'):
    # Returns a string of random characters. 
    return ''.join(choice(chars) for i in range(length))

class Jabberwocky(GA):

Example #6

Show file

File: pattern_vector.py Project: vishalbelsare/pattern_CLiPS

 def fitness(self, w):
     return sum(0.2 for ch in chngrams(w, 4) if ch in lexicon) + sum(
         0.1 for ch in chngrams(w, 3) if ch in lexicon)

Example #7

Show file

File: pattern_vector.py Project: vishalbelsare/pattern_CLiPS

        exclude=[],  # Filter words in the exclude list.
        stopwords=False,  # Include stop words?
        language='en')  # en, es, de, fr, it, nl
for k, v in freq_dic.iteritems():
    print k, v
# stop words and stemming
print stem('spies', stemmer=PORTER)
print stem('spies', stemmer=LEMMA)
s = 'The black cat was spying on the white cat.'
print count(words(s), stemmer=PORTER)
print count(words(s), stemmer=LEMMA)
s = 'The black cat was spying on the white cat.'
s = Sentence(parse(s))
print count(s, stemmer=LEMMA)
# character n-grams
print chngrams('The cat sat on the mat.'.lower(), n=3)
# document
text = "The shuttle Discovery, already delayed three times by technical problems and bad weather, was grounded again" \
    "Friday, this time by a potentially dangerous gaseous hydrogen leak in a vent line attached to the shipʼs" \
    "external tank. The Discovery was initially scheduled to make its 39th and final flight last Monday, bearing" \
    "fresh supplies and an intelligent robot for the International Space Station. But complications delayed the" \
    "flight from Monday to Friday, when the hydrogen leak led NASA to conclude that the shuttle would not be ready" \
    "to launch before its flight window closed this Monday."
doc = Document(text, threshold=1)
print doc.keywords(top=6)
document = Document(
    text,
    filter=lambda w: w.lstrip("'").isalnum(),
    punctuation='.,;:!?()[]{}\'`"@#$*+-|=~_',
    top=None,  # Filter words not in the top most frequent.
    threshold=0,  # Filter words whose count falls below threshold.

Example #8

Show file

def ngram_vector(s, n=3):
    v = {}
    v.update(chngrams(s.lower(), n))
    return v

Example #9

Show file

File: good-evil.py Project: OAlm/the_stromberg_stories

def ngram_vector(s, n=3):
    v = {}
    v.update(chngrams(s.lower(), n))
    return v

Example #10

Show file

File: genetic.py Project: rossgoodwin/lexiconjure

from pattern.vector import GA, chngrams

# from pattern.en import lexicon
import json
from random import choice
from random import randint as ri
from collections import Counter
from math import log

with open("words.json", "r") as infile:
    words = json.load(infile)

allgrams = list()
for w in words:
    char3grams = chngrams(w, 3).items()
    char4grams = chngrams(w, 4).items()
    allgrams.extend(char3grams + char4grams)

lexicon = Counter()
for gram in allgrams:
    lexicon[gram[0]] += gram[1]

# print 'lexicon length:', len(lexicon)

mostCommon = max(lexicon.keys(), key=lambda k: lexicon[k])
# print mostCommon, lexicon[mostCommon]


def chseq(length=4, chars="abcdefghijklmnopqrstuvwxyz"):
    # Returns a string of random characters.
    return "".join(choice(chars) for i in range(length))