예제 #1
0
def loadPickledFiles():
    global g_wnWords, g_wnWordIndex, g_wnDictPath, g_wnIndexPath, g_wnWordsPath
    print "loading wn dictionary data files"

    if not arsutils.fFileExists(g_wnDictPath):
        print "WordNet dictionary file '%s' doesn't exist" % g_wnDictPath
        return False

    if not arsutils.fFileExists(g_wnIndexPath):
        print "WordNet dictionary file '%s' doesn't exist" % g_wnIndexPath
        return False

    if not arsutils.fFileExists(g_wnWordsPath):
        print "WordNet dictionary file '%s' doesn't exist" % g_wnWordsPath
        return False

    try:
        fo = open(g_wnIndexPath, "rb")
        g_wnWordIndex = cPickle.load(fo)
        fo.close()

        fo = open(g_wnWordsPath, "rb")
        g_wnWords = cPickle.load(fo)
        fo.close()
    except Exception, ex:
        print arsutils.exceptionAsStr(ex)
        return False
예제 #2
0
def loadPickledFiles():
    global g_wnWords, g_wnWordIndex, g_wnDictPath, g_wnIndexPath, g_wnWordsPath
    print "loading wn dictionary data files"

    if not arsutils.fFileExists(g_wnDictPath):
        print "WordNet dictionary file '%s' doesn't exist" % g_wnDictPath
        return False

    if not arsutils.fFileExists(g_wnIndexPath):
        print "WordNet dictionary file '%s' doesn't exist" % g_wnIndexPath
        return False

    if not arsutils.fFileExists(g_wnWordsPath):
        print "WordNet dictionary file '%s' doesn't exist" % g_wnWordsPath
        return False

    try:
        fo = open(g_wnIndexPath, "rb")
        g_wnWordIndex = cPickle.load(fo)
        fo.close()

        fo = open(g_wnWordsPath, "rb")
        g_wnWords = cPickle.load(fo)
        fo.close()
    except Exception, ex:
        print arsutils.exceptionAsStr(ex)
        return False
예제 #3
0
def doInfoMan(outDir):
    global proSynsets

    ensureDir(outDir)

    # synchronize those file names with dictionary.py
    DICT_FILE = "wn-dict.txt"
    INDEX_FILE = "wn-words-index.pic"
    WORDS_FILE = "wn-words.pic"

    dictPath = os.path.join(outDir, DICT_FILE)
    indexPath = os.path.join(outDir, INDEX_FILE)
    wordsPath = os.path.join(outDir, WORDS_FILE)

    if arsutils.fFileExists(dictPath) and arsutils.fFileExists(
            indexPath) and arsutils.fFileExists(wordsPath):
        print "All files already exist. Nothing to do"
        return

    for fileName in dataFiles:
        print "groking file: %s" % fileName
        grok_one_data_file(fileName)

    if len(unparsed) > 0:
        print "Unparsed:", len(unparsed)
        for i in unparsed:
            print i

    all_words = {}
    print "generating word defs"
    for synset in proSynsets:
        word_def = synset.getTxtMarked()
        for w in synset.words:
            if all_words.has_key(w):
                # TODO: some better sorting of this data?
                all_words[w] = all_words[w] + word_def
            else:
                all_words[w] = word_def

    print "sorting words"
    sortedWords = all_words.keys()
    sortedWords.sort()

    print "words: %d" % len(sortedWords)
    print "writing %s" % dictPath
    dictDataFo = open(dictPath, "wb")
    wordIndex = {}
    curOffset = 0
    for word in sortedWords:
        wordDef = all_words[word]
        defLen = len(wordDef)
        wordIndex[word] = (curOffset, defLen)
        curOffset += defLen
        dictDataFo.write(wordDef)
    dictDataFo.close()

    print "pickling %s" % indexPath
    dictIndexFo = open(indexPath, "wb")
    cPickle.dump(wordIndex, dictIndexFo, protocol=cPickle.HIGHEST_PROTOCOL)
    dictIndexFo.close()

    print "pickling %s" % wordsPath
    dictWordsFo = open(wordsPath, "wb")
    cPickle.dump(sortedWords, dictWordsFo, protocol=cPickle.HIGHEST_PROTOCOL)
    dictWordsFo.close()
예제 #4
0
        fo = open(g_wnIndexPath, "rb")
        g_wnWordIndex = cPickle.load(fo)
        fo.close()

        fo = open(g_wnWordsPath, "rb")
        g_wnWords = cPickle.load(fo)
        fo.close()
    except Exception, ex:
        print arsutils.exceptionAsStr(ex)
        return False
    print "Finished loading WordNet files"

    global g_thWords, g_thWordIndex, g_thDictPath, g_thIndexPath, g_thWordsPath
    print "loading th dictionary data files"

    if not arsutils.fFileExists(g_thDictPath):
        print "Thesaurus dictionary file '%s' doesn't exist" % g_thDictPath
        return False

    if not arsutils.fFileExists(g_thIndexPath):
        print "Thesaurus dictionary file '%s' doesn't exist" % g_thIndexPath
        return False

    if not arsutils.fFileExists(g_thWordsPath):
        print "Thesaurus dictionary file '%s' doesn't exist" % g_thWordsPath
        return False

    try:
        fo = open(g_thIndexPath, "rb")
        g_thWordIndex = cPickle.load(fo)
        fo.close()
예제 #5
0
def doInfoMan(outDir):
    global proSynsets

    ensureDir(outDir)

    # synchronize those file names with dictionary.py
    DICT_FILE   = "wn-dict.txt"
    INDEX_FILE  = "wn-words-index.pic"
    WORDS_FILE  = "wn-words.pic"

    dictPath  = os.path.join(outDir, DICT_FILE)
    indexPath = os.path.join(outDir, INDEX_FILE)
    wordsPath = os.path.join(outDir, WORDS_FILE)

    if arsutils.fFileExists(dictPath) and arsutils.fFileExists(indexPath) and arsutils.fFileExists(wordsPath):
        print "All files already exist. Nothing to do"
        return

    for fileName in dataFiles:
        print "groking file: %s" % fileName
        grok_one_data_file(fileName)

    if len(unparsed) > 0:
        print "Unparsed:", len(unparsed)
        for i in unparsed:
            print i

    all_words = {}
    print "generating word defs"
    for synset in proSynsets:
        word_def = synset.getTxtMarked()
        for w in synset.words:
            if all_words.has_key(w):
                # TODO: some better sorting of this data?
                all_words[w] = all_words[w] + word_def
            else:
                all_words[w] = word_def

    print "sorting words"
    sortedWords = all_words.keys()
    sortedWords.sort()

    print "words: %d" % len(sortedWords)
    print "writing %s" % dictPath
    dictDataFo = open(dictPath, "wb")
    wordIndex = {}
    curOffset = 0
    for word in sortedWords:
        wordDef = all_words[word]
        defLen = len(wordDef)
        wordIndex[word] = (curOffset, defLen)
        curOffset += defLen
        dictDataFo.write(wordDef)
    dictDataFo.close()

    print "pickling %s" % indexPath
    dictIndexFo = open(indexPath, "wb")
    cPickle.dump(wordIndex, dictIndexFo, protocol=cPickle.HIGHEST_PROTOCOL)
    dictIndexFo.close()

    print "pickling %s" % wordsPath
    dictWordsFo = open(wordsPath, "wb")
    cPickle.dump(sortedWords, dictWordsFo, protocol=cPickle.HIGHEST_PROTOCOL)
    dictWordsFo.close()
예제 #6
0
def fCacheExists(sqlDumpFileName):
    txtName = getBodyFileName(sqlDumpFileName)
    idxFileName = getIdxFileName(sqlDumpFileName)
    if arsutils.fFileExists(txtName) and arsutils.fFileExists(idxFileName):
        return True
    return False
예제 #7
0
        fo = open(g_wnIndexPath, "rb")
        g_wnWordIndex = cPickle.load(fo)
        fo.close()

        fo = open(g_wnWordsPath, "rb")
        g_wnWords = cPickle.load(fo)
        fo.close()
    except Exception, ex:
        print arsutils.exceptionAsStr(ex)
        return False
    print "Finished loading WordNet files"

    global g_thWords, g_thWordIndex, g_thDictPath, g_thIndexPath, g_thWordsPath
    print "loading th dictionary data files"

    if not arsutils.fFileExists(g_thDictPath):
        print "Thesaurus dictionary file '%s' doesn't exist" % g_thDictPath
        return False

    if not arsutils.fFileExists(g_thIndexPath):
        print "Thesaurus dictionary file '%s' doesn't exist" % g_thIndexPath
        return False

    if not arsutils.fFileExists(g_thWordsPath):
        print "Thesaurus dictionary file '%s' doesn't exist" % g_thWordsPath
        return False

    try:
        fo = open(g_thIndexPath, "rb")
        g_thWordIndex = cPickle.load(fo)
        fo.close()
예제 #8
0
def fCacheExists(sqlDumpFileName):
    txtName = getBodyFileName(sqlDumpFileName)
    idxFileName = getIdxFileName(sqlDumpFileName)
    if arsutils.fFileExists(txtName) and arsutils.fFileExists(idxFileName):
        return True
    return False