tokens = vectorizer.get_feature_names()
freq = (X_train_tf.toarray().T).tolist()

f = open(trace_name + '.tf', 'w')
out = list(map(lambda x, y: [x, y], freq, tokens))
for item in out:  #sorted(out, key=lambda x:x[1], reverse=True):
    f.write(str(item) + '\n')
f.close()

tfidf_transformer = TfidfTransformer(use_idf=True)
X_train_tfidf = tfidf_transformer.fit_transform(X)

tokens = vectorizer.get_feature_names()
freq = (X_train_tfidf.toarray().T).tolist()

f = open(trace_name + '.tfidf', 'w')
out = list(map(lambda x, y: [x, y], freq, tokens))
for item in out:  #sorted(out, key=lambda x:x[1], reverse=True):
    f.write(str(item) + '\n')
f.close()

print(labels)

with open(trace_name + '.train', 'wb') as f:
    pickle.dump(X, f)
    pickle.dump(vectorizer, f)
    pickle.dump(tf_transformer, f)
    tfidf_transformer.stop_words_ = None
    pickle.dump(tfidf_transformer, f)
    pickle.dump(labels, f)