def save_binary(words, filename, parsed_texts): # , predictions): print("Saving to binary file.") vec = CountVectorizer(analyzer="word", vocabulary=words) train_data_features = vec.fit_transform(parsed_texts) features_arr = train_data_features.toarray() features_arr = np.sign(features_arr) i = 0 for row in features_arr: i += 1 if i > 200: break print(features_arr) p_save(features_arr, filename)
#for i in range(NUM_FEATURES): # print(' ' + cv.get_feature_names()[music[i]]); #print('Interview: '); #for i in range(NUM_FEATURES): # print(' ' + cv.get_feature_names()[interview[i]]); # Sum up the mutual information for all classes miSum = mi.sum(0); sortedIndices = np.argsort(-np.array(miSum[0])[0]); features = []; for i in range(NUM_FEATURES): feature = cv.get_feature_names()[sortedIndices[i]]; # print(' ' + feature); features.append(feature); p_save(features, "mi_features.dat"); print("Loading dataset.") data = [] ifile = open('data/ml_dataset_train.csv', "r") reader = csv.reader(ifile) i = 0 for row in reader: if i == 0: i = 1 continue data.append(row) ifile.close()