Python Corpus.append Examples

Programming Language: Python

Namespace/Package Name: pattern.vector

Class/Type: Corpus

Method/Function: append

Examples at hotexamples.com: 5

Python Corpus.append - 5 examples found. These are the top rated real world Python examples of pattern.vector.Corpus.append extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Corpus(5)

append(4)

build(2)

lsa(2)

reduce(2)

cluster(1)

document(1)

export(1)

extend(1)

feature_selection(1)

filter(1)

load(1)

nn(1)

save(1)

search(1)

Example #1

Show file

File: twittermining.py Project: zbhuiyan/SoftDesSp15

for i in range(1, 15):
    for tweet in Twitter().search(
            '#win' or '#fail', start=i, count=100
    ):  #searches 15*100=1500 tweets for these classes of hashtags
        p = '#win' in tweet.description.lower() and 'WIN' or 'FAIL'
        m = '#fail' in tweet.description.lower() and 'WIN' or 'FAIL'
        s = tweet.description.lower()
        s = Sentence(
            parse(s)
        )  #parse anlayzes & gives strings that are annotated with specified tags
        s = search('JJ',
                   s)  #searches for adjectives in tweets (JJ = adjectiive)
        s = [match[0].string for match in s]
        s = ' '.join(s)
        if len(s) > 0:
            corpus.append(Document(s, type=p))
            corpus.append(Document(s, type=m))

classifier = KNN()  #k-nearest neighbor classifier = K-NN
objects = []

for document in corpus:  #documents are an unordered bag of given sentences.

    classifier.train(
        document)  #adjective vectors in corpus trains the classifier
    objects.append(classifier.classify('awesome'))  #predicts awesome as win
    objects.append(classifier.classify('cool'))  #predicts cool as win
    objects.append(classifier.classify('damn'))  #predicts damn as fail
    objects.append(classifier.classify('sucks'))  #predicts sucks as fail

print objects

Example #2

Show file

File: 05-bayes.py Project: mlyne/Scripts

import glob
from pattern.vector import Document, Corpus, Bayes, KNN, features, distance, Vector, _distance, COSINE, kdtree


#from pattern.web import PDF
##pdf = PDF(open("/users/tom/downloads/10-1.1.1.61.7217.pdf", "rb").read())
#pdf = PDF(open("/users/tom/downloads/10-1.1.1.14.8422.pdf", "rb").read())
#print Document(unicode(pdf), threshold=1).keywords(30)
#print xxx

corpus = Corpus()
for product in glob.glob(os.path.join("reviews", "*")):
    for review in glob.glob(os.path.join(product, "*.txt")):
        polarity = "yes" in review
        s = open(review).read()
        corpus.append(Document(s, type=polarity, top=50, threshold=2))

#print "testtree"
#V = lambda x: Vector(dict(enumerate(x)))
#v = [(2,3), (5,4), (9,6), (4,7), (8,1), (7,2)]
#v = [V(x) for x in v]
#t = kdtree(v)
#print t.nn(V((9,5)))
#print xxx

n = 10
x = 0
t1 = 0
t2 = 0

for j in range(n):

Example #3

Show file

File: bayes.py Project: tegjyotsingh/SimpleEmotionDetection_tweet

corpus = Corpus(documents,weight=TFIDF)
print "number of documents:", len(corpus)
print "number of words:", len(corpus.vector)
print "number of words (average):", sum(len(d.terms) for d in corpus.documents) / float(len(corpus))
print

classifier=Bayes(aligned=True)
for document in corpus:
    classifier.train(document,type=document.type)
print 'Done training'

# To test the accuracy of a classifier, Using 10-fold crossvalidation
# This yields 4 scores: Accuracy, Precision, Recall and F-score.
print 'Bayes Classifier'
print  '-------------------------'
print  '(Accuracy, Precision,REcall,F-Measure)'
print Bayes.test(corpus.documents,folds=10)

#Testing on the Sample dataset of 10 Negative and 10 Positive Tweets
ft=open('test_20','r')
test_lines=ft.readlines()
for line in test_lines:
	t=(Document(line))
	corpus.append(t)
	print line.strip(),' ',str(classifier.classify(t))
ft.close()
f_neg.close()
f_pos.close()

Example #4

Show file

File: 04-KNN.py Project: sp00/pattern

corpus = Corpus()

# First, we mine a corpus of a 1000 tweets.
# We'll use hashtags as type.
for page in range(1, 10):
    for tweet in Twitter().search('#win OR #fail', start=page, count=100, cached=True):
        # If the tweet contains #win hashtag, we'll set its type to 'WIN':
        p = '#win' in tweet.description.lower() and 'WIN' or 'FAIL'
        s = tweet.description.lower()        # tweet in lowercase
        s = Sentence(parse(s))               # parse tree with part-of-speech tags
        s = search('JJ', s)                  # adjectives in the tweet
        s = [match[0].string for match in s] # adjectives as a list of strings
        s = " ".join(s)                      # adjectives as string
        if len(s) > 0:
            corpus.append(Document(s, type=p, stemmer=None))

# Train k-nearest neighbor on the corpus.
# Note that this is a only simple example: to build a robust classifier
# you would need a lot more training data (e.g., tens of thousands of tweets).
# The more training data, the more statistically reliable the classifier becomes.
# The only way to really know if you're classifier is working correctly
# is to test it with testing data, see the documentation for Classifier.test().
classifier = KNN()
for document in corpus:
    classifier.train(document)

# These are the adjectives the classifier has learned:
print sorted(classifier.terms)
print

Example #5

Show file

File: 04-KNN.py Project: mlyne/Scripts

corpus = Corpus()

# First, we mine a corpus of tweets.
# We'll use hashtags as type.
for page in range(1,6):
    for tweet in Twitter().search('#win OR #fail', start=page, count=100, cached=False):
        # If the tweet contains #win hashtag, we'll set its type to 'WIN':
        p = '#win' in tweet.description.lower() and 'WIN' or 'FAIL'
        s = tweet.description.lower()        # tweet in lowercase
        s = Sentence(parse(s))               # parse tree with part-of-speech tags
        s = search('JJ', s)                  # adjectives in the tweet
        s = [match[0].string for match in s] # adjectives as a list of strings
        s = " ".join(s)                      # adjectives as string
        if len(s) > 0:
            corpus.append(Document(s, type=p, threshold=0, stemmer=None))

# Train k-nearest neighbor on the corpus.
# Note that this is a only simple example: to build a robust classifier
# you would need a lot more training data (e.g., tens of thousands of tweets).
# The more training data, the more statistically reliable the classifier becomes.
# The only way to really know if you're classifier is working correctly
# is to test it with testing data, see the documentation for Classifier.test().
classifier = KNN()
for document in corpus:
    classifier.train(document)

# These are the words the classifier has learned:
print classifier.terms
print