Exemple #1
0
    def test_vstack(self):

        A = coo_matrix([[1, 2], [3, 4]])
        B = coo_matrix([[5, 6]])

        expected = matrix([[1, 2], [3, 4], [5, 6]])
        assert_equal(construct.vstack([A, B]).todense(), expected)
        assert_equal(construct.vstack([A, B], dtype=np.float32).dtype, np.float32)
        assert_equal(construct.vstack([A.tocsr(), B.tocsr()]).todense(), expected)
        assert_equal(construct.vstack([A.tocsr(), B.tocsr()], dtype=np.float32).dtype, np.float32)
Exemple #2
0
    def test_vstack(self):

        A = coo_matrix([[1,2],[3,4]])
        B = coo_matrix([[5,6]])

        expected = matrix([[1, 2],
                           [3, 4],
                           [5, 6]])
        assert_equal(construct.vstack([A,B]).todense(), expected)
        assert_equal(construct.vstack([A,B], dtype=np.float32).dtype, np.float32)
        assert_equal(construct.vstack([A.tocsr(),B.tocsr()]).todense(),
                     expected)
        assert_equal(construct.vstack([A.tocsr(),B.tocsr()], dtype=np.float32).dtype,
                     np.float32)
Exemple #3
0
    def test_vstack(self):

        A = coo_matrix([[1, 2], [3, 4]])
        B = coo_matrix([[5, 6]])

        expected = matrix([[1, 2], [3, 4], [5, 6]])
        assert_equal(construct.vstack([A, B]).todense(), expected)
    def test_vstack(self):

        A = coo_matrix([[1,2],[3,4]])
        B = coo_matrix([[5,6]])

        expected = matrix([[1, 2],
                           [3, 4],
                           [5, 6]])
        assert_equal(construct.vstack([A,B]).todense(), expected)
Exemple #5
0
    def test_vstack(self):

        A = coo_matrix([[1,2],[3,4]])
        B = coo_matrix([[5,6]])

        expected = array([[1, 2],
                          [3, 4],
                          [5, 6]])
        assert_equal(construct.vstack([A, B]).toarray(), expected)
        assert_equal(construct.vstack([A,B], dtype=np.float32).dtype, np.float32)
        assert_equal(construct.vstack([A.tocsr(), B.tocsr()]).toarray(),
                     expected)
        assert_equal(construct.vstack([A.tocsr(),B.tocsr()], dtype=np.float32).dtype,
                     np.float32)
        assert_equal(construct.vstack([A.tocsr(),B.tocsr()],
                                      dtype=np.float32).indices.dtype, np.int32)
        assert_equal(construct.vstack([A.tocsr(),B.tocsr()],
                                      dtype=np.float32).indptr.dtype, np.int32)
Exemple #6
0
#Using TFid Document Frequency max 200 features are collected for Legit and Spam classes for both Train and Test files
vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_features=200)

train_legit_tfid = vectorizer.fit_transform(train_legit)
#Printing the top 200 best words for Legit Class
print('Printing Legit 200 words:', vectorizer.get_feature_names())

train_spam_tfid = vectorizer.fit_transform(train_spam)
#Printing the top 200 best words for Spam Class
print('Printing Spam 200 words:', vectorizer.get_feature_names())

test_legit_tfid = vectorizer.fit_transform(test_legit)
test_spam_tfid = vectorizer.fit_transform(test_spam)

#Creating Features and Lables for Training
X_train = vstack((train_legit_tfid, train_spam_tfid))
labels_train = build_labels(train_legit_tfid.shape[0],
                            train_spam_tfid.shape[0])

#Creating Features and Lables for Testing
X_test = vstack((test_legit_tfid, test_spam_tfid))
labels_test = build_labels(test_legit_tfid.shape[0], test_spam_tfid.shape[0])

#Initializing Naives Classifier with alpha =0 No Laplace Smoothing
classifier = MultinomialNB(alpha=0)
classifier.fit(X_train, labels_train)
print(classifier)

predict_test = classifier.predict(X_test)

accuracy = classifier.score(X_test, labels_test)