s = search('JJ', s) #searches for adjectives in tweets (JJ = adjectiive) s = [match[0].string for match in s] s = ' '.join(s) if len(s) > 0: corpus.append(Document(s, type=p)) corpus.append(Document(s, type=m)) classifier = KNN() #k-nearest neighbor classifier = K-NN objects = [] for document in corpus: #documents are an unordered bag of given sentences. classifier.train( document) #adjective vectors in corpus trains the classifier objects.append(classifier.classify('awesome')) #predicts awesome as win objects.append(classifier.classify('cool')) #predicts cool as win objects.append(classifier.classify('damn')) #predicts damn as fail objects.append(classifier.classify('sucks')) #predicts sucks as fail print objects wincounter = 0 failcounter = 0 for thing in objects: if thing == 'WIN': wincounter += 1 elif thing == 'FAIL': failcounter += 1 else: pass
# is to test it with testing data, see the documentation for Classifier.test(). classifier = KNN(baseline=None) # By default, baseline=MAJORITY for document in m: # (classify unknown documents with the most frequent type). classifier.train(document) # These are the adjectives the classifier has learned: print sorted(classifier.features) print # We can now ask it to classify documents containing these words. # Note that you may get different results than the ones below, # since you will be mining other (more recent) tweets. # Again, a robust classifier needs lots and lots of training data. # If None is returned, the word was not recognized, # and the classifier returned the default value (see above). print classifier.classify('sweet potato burger') # yields 'WIN' print classifier.classify('stupid autocorrect') # yields 'FAIL' # "What can I do with it?" # In the scientific community, classifiers have been used to predict: # - the opinion (positive/negative) in product reviews on blogs, # - the age of users posting on social networks, # - the author of medieval poems, # - spam in e-mail messages, # - lies & deception in text, # - doubt & uncertainty in text, # and to: # - improve search engine query results (e.g., where "jeans" queries also yield "denim" results), # - win at Jeopardy!, # - win at rock-paper-scissors, # and so on...
classifier = KNN(baseline=None) # By default, baseline=MAJORITY # (classify unknown documents with the most frequent type). for document in m: classifier.train(document) # These are the adjectives the classifier has learned: print(sorted(classifier.features)) print() # We can now ask it to classify documents containing these words. # Note that you may get different results than the ones below, # since you will be mining other (more recent) tweets. # Again, a robust classifier needs lots and lots of training data. # If None is returned, the word was not recognized, # and the classifier returned the default value (see above). print(classifier.classify('sweet potato burger')) # yields 'WIN' print(classifier.classify('stupid autocorrect')) # yields 'FAIL' # "What can I do with it?" # In the scientific community, classifiers have been used to predict: # - the opinion (positive/negative) in product reviews on blogs, # - the age of users posting on social networks, # - the author of medieval poems, # - spam in e-mail messages, # - lies & deception in text, # - doubt & uncertainty in text, # and to: # - improve search engine query results (e.g., where "jeans" queries also yield "denim" results), # - win at Jeopardy!, # - win at rock-paper-scissors, # and so on...
# is to test it with testing data, see the documentation for Classifier.test(). classifier = KNN(baseline=None) # By default, baseline=MAJORITY for document in m: # (classify unknown documents with the most frequent type). classifier.train(document) # These are the adjectives the classifier has learned: print sorted(classifier.features) print # We can now ask it to classify documents containing these words. # Note that you may get different results than the ones below, # since you will be mining other (more recent) tweets. # Again, a robust classifier needs lots and lots of training data. # If None is returned, the word was not recognized, # and the classifier returned the default value (see above). print classifier.classify('sweet') # yields 'WIN' print classifier.classify('stupid') # yields 'FAIL' # "What can I do with it?" # In the scientific community, classifiers have been used to predict: # - the opinion (positive/negative) in product reviews on blogs, # - the age of users posting on social networks, # - the author of medieval poems, # - spam in e-mail messages, # - lies & deception in text, # - doubt & uncertainty in text, # and to: # - improve search engine query results (e.g., where "jeans" queries also yield "denim" results), # - win at Jeopardy!, # - win at rock-paper-scissors, # and so on...
# you would need a lot more training data (e.g., tens of thousands of tweets). # The more training data, the more statistically reliable the classifier becomes. # The only way to really know if you're classifier is working correctly # is to test it with testing data, see the documentation for Classifier.test(). classifier = KNN() for document in corpus: classifier.train(document) # These are the adjectives the classifier has learned: print sorted(classifier.terms) print # We can ask it to classify texts containing those words. # Note that you may get different results than the ones indicated below, # since you will be mining other (more recent) tweets. # Again, a robust classifier needs lots and lots of training data. print classifier.classify('sweet') # yields 'WIN' print classifier.classify('stupid') # yields 'FAIL' # "What can I do with it?" # In the scientific community, classifiers have been used to predict # - the author of medieval poems, # - the opinion (positive/negative) in product reviews on blogs, # - the age of users posting on social networks, # - predict spam e-mail messages, # - predict lies in text, # - predict doubt and uncertainty in text, # - improve search engine query results (e.g., where "jeans" queries also yield "denim" results), # - to win at jeopardy, # - to win at rock-paper-scissors, # and so on...
# is to test it with testing data, see the documentation for Classifier.test(). classifier = KNN(baseline=None) # By default, baseline=MAJORITY for document in m: # (classify unknown documents with the most frequent type). classifier.train(document) # These are the adjectives the classifier has learned: print sorted(classifier.features) print # We can now ask it to classify documents containing these words. # Note that you may get different results than the ones below, # since you will be mining other (more recent) tweets. # Again, a robust classifier needs lots and lots of training data. # If None is returned, the word was not recognized, # and the classifier returned the default value (see above). print classifier.classify("sweet potato burger") # yields 'WIN' print classifier.classify("stupid autocorrect") # yields 'FAIL' # "What can I do with it?" # In the scientific community, classifiers have been used to predict: # - the opinion (positive/negative) in product reviews on blogs, # - the age of users posting on social networks, # - the author of medieval poems, # - spam in e-mail messages, # - lies & deception in text, # - doubt & uncertainty in text, # and to: # - improve search engine query results (e.g., where "jeans" queries also yield "denim" results), # - win at Jeopardy!, # - win at rock-paper-scissors, # and so on...
# The more training data, the more statistically reliable the classifier becomes. # The only way to really know if you're classifier is working correctly # is to test it with testing data, see the documentation for Classifier.test(). classifier = KNN() for document in corpus: classifier.train(document) # These are the adjectives the classifier has learned: print sorted(classifier.terms) print # We can ask it to classify texts containing those words. # Note that you may get different results than the ones indicated below, # since you will be mining other (more recent) tweets. # Again, a robust classifier needs lots and lots of training data. print classifier.classify("sweet") # yields 'WIN' print classifier.classify("stupid") # yields 'FAIL' # "What can I do with it?" # In the scientific community, classifiers have been used to predict # - the author of medieval poems, # - the opinion (positive/negative) in product reviews on blogs, # - the age of users posting on social networks, # - predict spam e-mail messages, # - predict lies in text, # - predict doubt and uncertainty in text, # - improve search engine query results (e.g., where "jeans" queries also yield "denim" results), # - to win at jeopardy, # - to win at rock-paper-scissors, # and so on...