def load_docs(docs_filename,numOfDoc,vocab,model): cnt=0; docs=[None]*numOfDoc; len_sum=0; for line in open(docs_filename): doc=Doc(line,vocab); doc.init_varational_parameters(vocab,model); len_sum+=len(doc); docs[cnt]=doc; if cnt%1000==0: print "progress:",cnt,"memoery useage:",resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1000,"time:",datetime.now(); cnt+=1; if cnt>=numOfDoc: break; print "ave length of doc:",float(len_sum)/cnt; return docs;
def load_docs(docs_filename, numOfDoc, vocab, model): cnt = 0 docs = [None] * numOfDoc len_sum = 0 for line in open(docs_filename): doc = Doc(line, vocab) doc.init_varational_parameters(vocab, model) len_sum += len(doc) docs[cnt] = doc if cnt % 1000 == 0: print "progress:", cnt, "memoery useage:", resource.getrusage( resource.RUSAGE_SELF).ru_maxrss / 1000, "time:", datetime.now( ) cnt += 1 if cnt >= numOfDoc: break print "ave length of doc:", float(len_sum) / cnt return docs