Esempio n. 1
0
 def __init__(self):
     # initialize all "global" data
     logger.debug('loading...')
     logger.debug('  corpus...')
     # FIXME: using absolute paths is the easiest way to make us work from cmdline and invoked
     # in a web app. perhaps we could set up softlinks in /var/ to make this slightly more respectable.
     self.g = GramsBin(
         '/home/pizza/proj/spill-chick/data/corpus/google-ngrams/word.bin',
         '/home/pizza/proj/spill-chick/data/corpus/google-ngrams/ngram3.bin'
     )
     self.w = Words(NGram3BinWordCounter(self.g.ng))
     logger.debug('  phon')
     self.p = Phon(self.w, self.g)
     logger.debug('done.')
     # sanity-check junk
     """
Esempio n. 2
0
        if x == y:
            return 0
        damlev = ngd.diff.damlev
        sx, sy = p.phraseSound([x]), p.phraseSound([y])
        if sx == sy and sx:
            # sound the same, e.g. there/their. consider these equal.
            return damlev
        # otherwise, calculate phonic/edit difference
        return max(damlev,
                   min(NGramDiffScore.overlap(sx, sy), abs(len(x) - len(y))))


if __name__ == '__main__':
    import sys
    sys.path.append('..')
    from grambin import GramsBin
    from word import Words, NGram3BinWordCounter
    from phon import Phon
    import logging

    logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
    logging.debug('loading...')
    g = GramsBin(
        '/home/pizza/proj/spill-chick/data/corpus/google-ngrams/word.bin',
        '/home/pizza/proj/spill-chick/data/corpus/google-ngrams/ngram3.bin')
    w = Words(NGram3BinWordCounter(g.ng))
    p = Phon(w, g)
    logging.debug('loaded.')

    pass