size = len(idf.files) tf = dict() wordLoc = dict() # Index files t = 0 for f in idf.files: try: temp = time.time() doc = Document(f) index.buildIndex(doc) tf[doc.docNo] = doc.TF wordLoc[doc.docNo] = doc.wordLoc idf.buildDF(doc) t=t + (time.time()-temp) doc.write2DB(cur) i +=1 if i%10==0: if i%300==0: con.commit() percent = i*100/size t = time.time() - startTime sys.stdout.write('\r indexing...\033[92m%2d\033[0m%% \033[92m%2.0f\033[0ms'%(percent,t)) sys.stdout.flush() except: print " some file format is not correct!" continue con.commit() idf.buildIDF() #index.seldomWords() #index.wordsVector()