def __init__(self): self.stemmer = PorterStemmer() try: f = open(os.getcwd() + "/data/tokensDict.p", "r") self.tokens = pickle.load(f) except: print "Pickle file not found" print "Creating the Dirctionary" self.tokens = scanCleanDir() f = open(os.getcwd() + "/data/tokensDict.p", "w") pickle.dump(self.tokens, f)
def __init__(self): self.stemmer = PorterStemmer() """ self.tokens is a dictionary containing all the meta-data for all of the tokens and documents. It has the structure: {(Token): {(Document Number): [(list of all the positions of the token in that document.)] } } """ try: f = open(os.getcwd() + "/data/tokensDict.p", "r") self.tokens = pickle.load(f) except: print "Pickle file not found" print "Creating the dictionary of meta-data." self.tokens = scanCleanDir() f = open(os.getcwd() + "/data/tokensDict.p", "w") pickle.dump(self.tokens, f)