def generateTfidf(dataset_train_filename):
  for tokens, stars in generateYelpExample(dataset_train_filename):
    doc_tfidf = tfidf_model[dictionary_train.doc2bow(tokens)]
    if not len(doc_tfidf):
      continue
    
    yield stars, Counter(dict(doc_tfidf))
def generateWordFreq(dataset_train_filename):
    for tokens, stars in generateYelpExample(dataset_train_filename):
        doc_wordFreq = Counter(dict( dictionary_train.doc2bow(tokens) ) )
        if len(doc_wordFreq):
            doc_nwords = float(sum(doc_wordFreq.values()))
            for key in doc_wordFreq:
                doc_wordFreq[key] /= doc_nwords
            yield stars, doc_wordFreq;