#---------------------------- # ============================= # # obtain wiki article word hash # # ============================= # wikiFileList = os.listdir('.') wikiHash = {} Idx_processedFile = 0 for Idx_wikiFile in range(len(wikiFileList)): fileName = wikiFileList[Idx_wikiFile] if re.search("text\.", fileName): fileHash = {} print "now processing : %s" % (fileName) if os.path.isfile(fileName + '.np'): os.remove(fileName + '.np') fileName_preproc = TEXT.PREPROCESS(fileName) # .np and .np.sort are generated fileObject = open(fileName_preproc) wordList = fileObject.read().splitlines() fileObject.close() wordHash = {} for word in wordList: if dictHash.has_key(word): # to ensure only popular words / english are included if wordHash.has_key(word): wordHash[word] += 1 else: wordHash[word] = 1 #------- del word #------- fileHash['_wordprofile'] = wordHash fileHash['_title'] = fileName