def test_vstack(self): A = coo_matrix([[1, 2], [3, 4]]) B = coo_matrix([[5, 6]]) expected = matrix([[1, 2], [3, 4], [5, 6]]) assert_equal(construct.vstack([A, B]).todense(), expected) assert_equal(construct.vstack([A, B], dtype=np.float32).dtype, np.float32) assert_equal(construct.vstack([A.tocsr(), B.tocsr()]).todense(), expected) assert_equal(construct.vstack([A.tocsr(), B.tocsr()], dtype=np.float32).dtype, np.float32)
def test_vstack(self): A = coo_matrix([[1,2],[3,4]]) B = coo_matrix([[5,6]]) expected = matrix([[1, 2], [3, 4], [5, 6]]) assert_equal(construct.vstack([A,B]).todense(), expected) assert_equal(construct.vstack([A,B], dtype=np.float32).dtype, np.float32) assert_equal(construct.vstack([A.tocsr(),B.tocsr()]).todense(), expected) assert_equal(construct.vstack([A.tocsr(),B.tocsr()], dtype=np.float32).dtype, np.float32)
def test_vstack(self): A = coo_matrix([[1, 2], [3, 4]]) B = coo_matrix([[5, 6]]) expected = matrix([[1, 2], [3, 4], [5, 6]]) assert_equal(construct.vstack([A, B]).todense(), expected)
def test_vstack(self): A = coo_matrix([[1,2],[3,4]]) B = coo_matrix([[5,6]]) expected = matrix([[1, 2], [3, 4], [5, 6]]) assert_equal(construct.vstack([A,B]).todense(), expected)
def test_vstack(self): A = coo_matrix([[1,2],[3,4]]) B = coo_matrix([[5,6]]) expected = array([[1, 2], [3, 4], [5, 6]]) assert_equal(construct.vstack([A, B]).toarray(), expected) assert_equal(construct.vstack([A,B], dtype=np.float32).dtype, np.float32) assert_equal(construct.vstack([A.tocsr(), B.tocsr()]).toarray(), expected) assert_equal(construct.vstack([A.tocsr(),B.tocsr()], dtype=np.float32).dtype, np.float32) assert_equal(construct.vstack([A.tocsr(),B.tocsr()], dtype=np.float32).indices.dtype, np.int32) assert_equal(construct.vstack([A.tocsr(),B.tocsr()], dtype=np.float32).indptr.dtype, np.int32)
#Using TFid Document Frequency max 200 features are collected for Legit and Spam classes for both Train and Test files vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_features=200) train_legit_tfid = vectorizer.fit_transform(train_legit) #Printing the top 200 best words for Legit Class print('Printing Legit 200 words:', vectorizer.get_feature_names()) train_spam_tfid = vectorizer.fit_transform(train_spam) #Printing the top 200 best words for Spam Class print('Printing Spam 200 words:', vectorizer.get_feature_names()) test_legit_tfid = vectorizer.fit_transform(test_legit) test_spam_tfid = vectorizer.fit_transform(test_spam) #Creating Features and Lables for Training X_train = vstack((train_legit_tfid, train_spam_tfid)) labels_train = build_labels(train_legit_tfid.shape[0], train_spam_tfid.shape[0]) #Creating Features and Lables for Testing X_test = vstack((test_legit_tfid, test_spam_tfid)) labels_test = build_labels(test_legit_tfid.shape[0], test_spam_tfid.shape[0]) #Initializing Naives Classifier with alpha =0 No Laplace Smoothing classifier = MultinomialNB(alpha=0) classifier.fit(X_train, labels_train) print(classifier) predict_test = classifier.predict(X_test) accuracy = classifier.score(X_test, labels_test)