Esempio n. 1
0
	def createWordsObjects(self, dataDict, language, translateTo):
		wordObjects = []
		for k in dataDict.keys():
			if int(k) < 7:
				wordObjects.append(Words(dataDict[k][language], dataDict[k][translateTo]))
			elif int(k) < 12:
				wordObjects.append(Words(dataDict[k][language], dataDict[k][translateTo], 1))
			else:
				wordObjects.append(Words(dataDict[k][language], dataDict[k][translateTo], 2))
		return wordObjects
Esempio n. 2
0
 def __init__(self):
     emotion_list = ["Angry", "Fear", "Happy", "Sad", "Surprise"]
     self.feeling = random.choice(emotion_list)
     emotion_list.remove(self.feeling)
     self.opposite = random.choice(emotion_list)
     self.read_rate = random.randint(1, 10)  # number of sentences to read
     #self.current_power = random.randint(0,30) # starting power?
     self.sentences = []
     self.words = Words(self.feeling, self.opposite)
     self.markov_blob = None
Esempio n. 3
0
 def __init__(self):
     # initialize all "global" data
     logger.debug('loading...')
     logger.debug('  corpus...')
     # FIXME: using absolute paths is the easiest way to make us work from cmdline and invoked
     # in a web app. perhaps we could set up softlinks in /var/ to make this slightly more respectable.
     self.g = GramsBin(
         '/home/pizza/proj/spill-chick/data/corpus/google-ngrams/word.bin',
         '/home/pizza/proj/spill-chick/data/corpus/google-ngrams/ngram3.bin'
     )
     self.w = Words(NGram3BinWordCounter(self.g.ng))
     logger.debug('  phon')
     self.p = Phon(self.w, self.g)
     logger.debug('done.')
     # sanity-check junk
     """
Esempio n. 4
0
            t = ' '.join(snd[:j])
            words = self.phon.get(t)
            if words:
                for s in self.soundsToWords(snd[j:]):
                    yield [words] + s


if __name__ == '__main__':

    def words(str):
        return re.findall('[a-z\']+', str.lower())

    def pron(wl, wd):
        print(' '.join(
            [str(wd[w][0]) if w in wd else '<%s>' % (w, ) for w in wl]))

    P = Phon(Words())
    for a in sys.argv[1:]:
        pron(words(a), P.W)

    print(P.word['there'])
    print(P.phon[P.word['there'][0]])

    P.phraseSound(['making', 'mistake'])
    P.phraseSound(['may', 'king', 'mist', 'ache'])
    x = P.phraseSound(['making', 'miss', 'steak'])
    from itertools import product
    for f in P.soundsToWords(x):
        print(f)
        #print(list(product(*f)))
Esempio n. 5
0
        if x == y:
            return 0
        damlev = ngd.diff.damlev
        sx, sy = p.phraseSound([x]), p.phraseSound([y])
        if sx == sy and sx:
            # sound the same, e.g. there/their. consider these equal.
            return damlev
        # otherwise, calculate phonic/edit difference
        return max(damlev,
                   min(NGramDiffScore.overlap(sx, sy), abs(len(x) - len(y))))


if __name__ == '__main__':
    import sys
    sys.path.append('..')
    from grambin import GramsBin
    from word import Words, NGram3BinWordCounter
    from phon import Phon
    import logging

    logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
    logging.debug('loading...')
    g = GramsBin(
        '/home/pizza/proj/spill-chick/data/corpus/google-ngrams/word.bin',
        '/home/pizza/proj/spill-chick/data/corpus/google-ngrams/ngram3.bin')
    w = Words(NGram3BinWordCounter(g.ng))
    p = Phon(w, g)
    logging.debug('loaded.')

    pass