예제 #1
0
파일: mi.py 프로젝트: campbelljc/598p2
def save_binary(words, filename, parsed_texts):  # , predictions):
    print("Saving to binary file.")
    vec = CountVectorizer(analyzer="word", vocabulary=words)
    train_data_features = vec.fit_transform(parsed_texts)
    features_arr = train_data_features.toarray()
    features_arr = np.sign(features_arr)

    i = 0
    for row in features_arr:
        i += 1
        if i > 200:
            break

    print(features_arr)
    p_save(features_arr, filename)
예제 #2
0
#for i in range(NUM_FEATURES):
#    print(' ' + cv.get_feature_names()[music[i]]);
#print('Interview: ');
#for i in range(NUM_FEATURES):
#    print(' ' + cv.get_feature_names()[interview[i]]);

# Sum up the mutual information for all classes
miSum = mi.sum(0);
sortedIndices = np.argsort(-np.array(miSum[0])[0]);
features = [];
for i in range(NUM_FEATURES):
    feature = cv.get_feature_names()[sortedIndices[i]];
  #  print(' ' + feature);
    features.append(feature);

p_save(features, "mi_features.dat");

print("Loading dataset.")

data = []
ifile  = open('data/ml_dataset_train.csv', "r")
reader = csv.reader(ifile)
i = 0
for row in reader:
    if i == 0:
        i = 1
        continue
    data.append(row)

ifile.close()