Esempio n. 1
0
 def ModelInit(self, filename):
     Docs = LoadData.LoadDataFromFile(os.getcwd() + "/" + filename)
     self.D = len(Docs)
     print "Load ", self.D, " docs from the file"
     StopWordList = LoadData.LoadStopWords()
     WordListSet = [
         Preprocess.PreprocessText(doc, StopWordList) for doc in Docs
         if type(doc) != unicode
     ]
     self.Dictionary = Preprocess.ConstructDictionary(WordListSet)
     self.W = len(self.Dictionary)
     print "Total number of words is: ", self.W
     print "Begin to save the dictionary..."
     self.SaveDictionary()
     print "Done!!"
     print "Begin to map the word to ID"
     self.IDListSet = []
     inv_dict = {v: k for k, v in self.Dictionary.iteritems()}
     for wdl in WordListSet:
         IdList = Preprocess.Word2Id(wdl, inv_dict)
         self.IDListSet.append(IdList)
     print "Done!!"
     self.ndsum = ListUtil.Initial(self.D)
     self.theta = ListUtil.InitialMat(self.D, self.K, 0.0)
     self.phi = ListUtil.InitialMat(self.K, self.W, 0.0)
     self.nd = ListUtil.InitialMat(self.D, self.K, 0)
     self.nw = ListUtil.InitialMat(self.W, self.K, 0)
     self.Z = []
     print "Begin to initialize the LDA model..."
     self.RandomAssignTopic()
     print "Topic assignment done!!"