def tf_idf_trans_feature_vector():
    token_array = text_processed()
    training_token_array, test_token_array = split_string_2_data_array(
        token_array, 0.8)
    print(token_array)
    vectorizer = TfidfTransformer(stop_words='english', analyzer="word")
    # tokenize and build vocab
    X = vectorizer.fit_transform(token_array)
    analyze = vectorizer.build_analyzer()
    print(analyze("subject is not the case"))
    # summarize
    print(vectorizer.get_feature_names())
    # summarize encoded vector
    print(X.toarray())
    return X