def createWordsObjects(self, dataDict, language, translateTo): wordObjects = [] for k in dataDict.keys(): if int(k) < 7: wordObjects.append(Words(dataDict[k][language], dataDict[k][translateTo])) elif int(k) < 12: wordObjects.append(Words(dataDict[k][language], dataDict[k][translateTo], 1)) else: wordObjects.append(Words(dataDict[k][language], dataDict[k][translateTo], 2)) return wordObjects
def __init__(self): emotion_list = ["Angry", "Fear", "Happy", "Sad", "Surprise"] self.feeling = random.choice(emotion_list) emotion_list.remove(self.feeling) self.opposite = random.choice(emotion_list) self.read_rate = random.randint(1, 10) # number of sentences to read #self.current_power = random.randint(0,30) # starting power? self.sentences = [] self.words = Words(self.feeling, self.opposite) self.markov_blob = None
def __init__(self): # initialize all "global" data logger.debug('loading...') logger.debug(' corpus...') # FIXME: using absolute paths is the easiest way to make us work from cmdline and invoked # in a web app. perhaps we could set up softlinks in /var/ to make this slightly more respectable. self.g = GramsBin( '/home/pizza/proj/spill-chick/data/corpus/google-ngrams/word.bin', '/home/pizza/proj/spill-chick/data/corpus/google-ngrams/ngram3.bin' ) self.w = Words(NGram3BinWordCounter(self.g.ng)) logger.debug(' phon') self.p = Phon(self.w, self.g) logger.debug('done.') # sanity-check junk """
t = ' '.join(snd[:j]) words = self.phon.get(t) if words: for s in self.soundsToWords(snd[j:]): yield [words] + s if __name__ == '__main__': def words(str): return re.findall('[a-z\']+', str.lower()) def pron(wl, wd): print(' '.join( [str(wd[w][0]) if w in wd else '<%s>' % (w, ) for w in wl])) P = Phon(Words()) for a in sys.argv[1:]: pron(words(a), P.W) print(P.word['there']) print(P.phon[P.word['there'][0]]) P.phraseSound(['making', 'mistake']) P.phraseSound(['may', 'king', 'mist', 'ache']) x = P.phraseSound(['making', 'miss', 'steak']) from itertools import product for f in P.soundsToWords(x): print(f) #print(list(product(*f)))
if x == y: return 0 damlev = ngd.diff.damlev sx, sy = p.phraseSound([x]), p.phraseSound([y]) if sx == sy and sx: # sound the same, e.g. there/their. consider these equal. return damlev # otherwise, calculate phonic/edit difference return max(damlev, min(NGramDiffScore.overlap(sx, sy), abs(len(x) - len(y)))) if __name__ == '__main__': import sys sys.path.append('..') from grambin import GramsBin from word import Words, NGram3BinWordCounter from phon import Phon import logging logging.basicConfig(stream=sys.stderr, level=logging.DEBUG) logging.debug('loading...') g = GramsBin( '/home/pizza/proj/spill-chick/data/corpus/google-ngrams/word.bin', '/home/pizza/proj/spill-chick/data/corpus/google-ngrams/ngram3.bin') w = Words(NGram3BinWordCounter(g.ng)) p = Phon(w, g) logging.debug('loaded.') pass