names = count_vectorizer.get_feature_names()

# get_support - boolean array of shape [# input features],
# in which an element is True iff its corresponding feature is selected for retention
selected_words = np.asarray(names)[select.get_support()]
# print(', '.join(selected_words))


# ************* Make average vectors of w2v representation of top 1000 words ***************************
model = gensim.models.Word2Vec.load("300features_40minwords_10context")
features_count = 300

train_reviews = []
for review in train["review"]:
    train_reviews.append(review_to_wordlist(review, vocabulary=selected_words))

trainDataVecs = get_avg_feature_vecs(train_reviews, model, features_count)

print("Creating average feature vecs for test reviews")
test_reviews = []
for review in test["review"]:
    test_reviews.append(review_to_wordlist(review, vocabulary=selected_words))

testDataVecs = get_avg_feature_vecs(test_reviews, model, features_count)


# ************* Make a prediction ******************************

model = LinearRegression()
def prepare_rewiews(reviews):
    clean_reviews = []
    for r in reviews:
        clean_reviews.append(review_to_wordlist(r, remove_stopwords=True))

    return clean_reviews