Python KNN.test 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pattern.vector

클래스/타입: KNN

메소드/함수: test

hotexamples.com에서의 예제들: 7

Python KNN.test - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pattern.vector.KNN.test에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

KNN(11)

test(4)

train(4)

classify(3)

load(1)

save(1)

예제 #1

파일 보기

# We'll reduce the document vectors to 10 concepts.

# Let's test how our model performs as a classifier.
# A document can have a label (or type, or class).
# For example, in the movie reviews corpus,
# there are positive reviews (score > 0) and negative reviews (score < 0).
# A classifier uses a model as "training" data
# to predict the label (type/class) of unlabeled documents.
# In this case, it can predict whether a new movie review is positive or negative.

# The details are not that important right now, just observe the accuracy.
# Naturally, we want accuracy to stay the same after LSA reduction,
# and hopefully decrease the time needed to run.

t = time.time()
print "accuracy:", KNN.test(m, folds=10)[-1]
print "time:", time.time() - t
print

# Reduce the documents to vectors of 10 concepts (= 1/4 of 40 features).
print "LSA reduction..."
print
m.reduce(10)

t = time.time()
print "accuracy:", KNN.test(m, folds=10)[-1]
print "time:", time.time() - t
print

# Accuracy is about the same, but the performance is better: 2x-3x faster,
# because each document is now a "10-word summary" of the original review.

예제 #2

파일 보기

파일: 03-lsa.py 프로젝트: Abhishek-1/temp

# We'll reduce the document vectors to 10 concepts.

# Let's test how our model performs as a classifier.
# A document can have a label (or type, or class).
# For example, in the movie reviews corpus,
# there are positive reviews (score > 0) and negative reviews (score < 0).
# A classifier uses a model as "training" data
# to predict the label (type/class) of unlabeled documents.
# In this case, it can predict whether a new movie review is positive or negative.

# The details are not that important right now, just observe the accuracy.
# Naturally, we want accuracy to stay the same after LSA reduction,
# and hopefully decrease the time needed to run.

t = time.time()
print("accuracy:", KNN.test(m, folds=10)[-1])
print("time:", time.time() - t)
print()

# Reduce the documents to vectors of 10 concepts (= 1/4 of 40 features).
print("LSA reduction...")
print()
m.reduce(10)

t = time.time()
print("accuracy:", KNN.test(m, folds=10)[-1])
print("time:", time.time() - t)
print()

# Accuracy is about the same, but the performance is better: 2x-3x faster,
# because each document is now a "10-word summary" of the original review.

예제 #3

파일 보기

파일: 03-lsa.py 프로젝트: DataBranner/pattern

# Let's test how our model performs as a classifier.
# A document can have a label (or type, or class).
# For example, in the movie reviews corpus,
# there are positive reviews (score > 0) and negative reviews (score < 0).
# A classifier uses a model as "training" data
# to predict the label (type/class) of unlabeled documents.
# In this case, it can predict whether a new movie review is positive or
# negative.

# The details are not that important right now, just observe the accuracy.
# Naturally, we want accuracy to stay the same after LSA reduction,
# and hopefully decrease the time needed to run.

t = time.time()
print("accuracy:", KNN.test(m, folds=10)[-1])
print("time:", time.time() - t)
print()

# Reduce the documents to vectors of 10 concepts (= 1/4 of 40 features).
print("LSA reduction...")
print()
m.reduce(10)

t = time.time()
print("accuracy:", KNN.test(m, folds=10)[-1])
print("time:", time.time() - t)
print()

# Accuracy is about the same, but the performance is better: 2x-3x faster,
# because each document is now a "10-word summary" of the original review.

예제 #4

파일 보기

파일: 05-bayes.py 프로젝트: mlyne/Scripts

        x += 1

print "ERROR"
print x / n
print t1
print t2

#print xxx


print len(corpus)
print len(corpus.features)
print len(corpus.documents[0].vector)
from time import time
t = time()
print KNN.test(corpus, folds=10)
print time()-t

print "filter..."

from time import time
t = time()
f = corpus.feature_selection(150, verbose=False)
print f
print time()-t
corpus = corpus.filter(f)

#corpus.reduce(300)
#print len(corpus.lsa.vectors[corpus.documents[0].id])
#print corpus.lsa.vectors[corpus.documents[0].id]
#print len(corpus)

예제 #5

파일 보기

파일: 03-lsa.py 프로젝트: AnthonyNystrom/pattern

corpus = Corpus(documents)

print "number of documents:", len(corpus)
print "number of words:", len(corpus.vector)
print "number of words (average):", sum(len(d.terms) for d in corpus.documents) / float(len(corpus))
print

# This may be too much words for some clustering algorithms (e.g., hierarchical).
# We'll reduce the documents to vectors of 4 concepts.

# First, let's test how the corpus would perform as a classifier.
# The details of KNN are not that important right now, just observe the numbers.
# Naturally, we want accuracy to stay the same after LSA reduction,
# and hopefully decrease the time needed to run.
t = time.time()
print "accuracy:", KNN.test(corpus, folds=10)[-1]
print "time:", time.time() - t
print

# Reduce the documents to vectors of 4 concepts (= 1/7 of 30 words).
print "LSA reduction..."
print
corpus.reduce(4)

t = time.time()
print "accuracy:", KNN.test(corpus, folds=10)[-1]
print "time:", time.time() - t
print

# Not bad, accuracy is about the same but performance is 3x faster,
# because each document is now a "4-word summary" of the original review.

예제 #6

파일 보기

print "number of documents:", len(corpus)
print "number of words:", len(corpus.vector)
print "number of words (average):", sum(
    len(d.terms) for d in corpus.documents) / float(len(corpus))
print

# This may be too much words for some clustering algorithms (e.g., hierarchical).
# We'll reduce the documents to vectors of 4 concepts.

# First, let's test how the corpus would perform as a classifier.
# The details of KNN are not that important right now, just observe the numbers.
# Naturally, we want accuracy to stay the same after LSA reduction,
# and hopefully decrease the time needed to run.
t = time.time()
print "accuracy:", KNN.test(corpus, folds=10)[-1]
print "time:", time.time() - t
print

# Reduce the documents to vectors of 4 concepts (= 1/7 of 30 words).
print "LSA reduction..."
print
corpus.reduce(4)

t = time.time()
print "accuracy:", KNN.test(corpus, folds=10)[-1]
print "time:", time.time() - t
print

# Not bad, accuracy is about the same but performance is 3x faster,
# because each document is now a "4-word summary" of the original review.

예제 #7

파일 보기

파일: knn.py 프로젝트: tegjyotsingh/SimpleEmotionDetection_tweet

print 'Number of Negative Tweets:',len(neg_lines)
print 'Number of Positive Tweets:',len(pos_lines)

documents = []
for line in neg_lines:
    document = Document(line,stopword=True,stemmer=PORTER,type='0')
    documents.append(document)
for line in pos_lines:
    document = Document(line,stopword=True,stemmer=PORTER,type='1')
    documents.append(document)

corpus = Corpus(documents,weight=TFIDF)
print "number of documents:", len(corpus)
print "number of words:", len(corpus.vector)
print "number of words (average):", sum(len(d.terms) for d in corpus.documents) / float(len(corpus))
print

#Filtering top 1000 features using Information Gain Criterion
corpus=corpus.filter(features=(corpus.feature_selection(top=1000,method=IG)))

# To test the accuracy of a classifier, Using 10-fold crossvalidation
# This yields 4 scores: Accuracy, Precision, Recall and F-score.
print 'classifying using KNN'
print  '-------------------------'
print  '(Accuracy, Precision,REcall,F-Measure)'
print KNN.test(corpus,k=100,folds=10,distance=COSINE)

f_neg.close()
f_pos.close()