def load_docs(docs_filename,numOfDoc,vocab,model):
    cnt=0;
    docs=[None]*numOfDoc;
    len_sum=0;
    for line in open(docs_filename):
        doc=Doc(line,vocab);
        doc.init_varational_parameters(vocab,model);
        len_sum+=len(doc);
        docs[cnt]=doc;
        if cnt%1000==0:
            print "progress:",cnt,"memoery useage:",resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1000,"time:",datetime.now();
        cnt+=1;
        if cnt>=numOfDoc:
            break;
    print "ave length of doc:",float(len_sum)/cnt;
    return docs;
Exemple #2
0
def load_docs(docs_filename, numOfDoc, vocab, model):
    cnt = 0
    docs = [None] * numOfDoc
    len_sum = 0
    for line in open(docs_filename):
        doc = Doc(line, vocab)
        doc.init_varational_parameters(vocab, model)
        len_sum += len(doc)
        docs[cnt] = doc
        if cnt % 1000 == 0:
            print "progress:", cnt, "memoery useage:", resource.getrusage(
                resource.RUSAGE_SELF).ru_maxrss / 1000, "time:", datetime.now(
                )
        cnt += 1
        if cnt >= numOfDoc:
            break
    print "ave length of doc:", float(len_sum) / cnt
    return docs