def GET(self, request): web.header('Access-Control-Allow-Origin', '*') if not web.svm: return 'error' text = web.input(_unicode=False)['text'].decode('utf-8', 'ignore') account_id = int(web.input(_unicode=False)['id'].decode('utf-8', 'ignore')) account_id = account_id or 1 print(u"Classifying text [{0}]".format(text)) features_list = [features.tweet_to_words(text)] if features_list and len(features_list) > 0: try: labels = web.svm[account_id].classify_many(features_list) if labels and len(labels) > 0: return labels[0] else: return 'empty' except Exception, e: traceback.print_exc() return 'error'
def GET(self, request): web.header('Access-Control-Allow-Origin', '*') if not web.svm: return 'error' text = web.input(_unicode=False)['text'].decode('utf-8', 'ignore') account_id = int( web.input(_unicode=False)['id'].decode('utf-8', 'ignore')) account_id = account_id or 1 print(u"Classifying text [{0}]".format(text)) features_list = [features.tweet_to_words(text)] if features_list and len(features_list) > 0: try: labels = web.svm[account_id].classify_many(features_list) if labels and len(labels) > 0: return labels[0] else: return 'empty' except Exception, e: traceback.print_exc() return 'error'
if not w.lower() in stopwords.words('english') ) word_features = list(all_words)[:500] nltk.FreqDist.pprint(all_words, 500) return word_features print(decisions.categories()) documents = [(list(decisions.words(fileid)), category) for category in decisions.categories() for fileid in decisions.fileids(category)] #random.shuffle(documents) print (documents) pos_features = [(features.tweet_to_words(d), c) for (d, c) in documents if c == 'pos'] neg_features = [(features.tweet_to_words(d), c) for (d, c) in documents if c == 'neg'] random.shuffle(pos_features) random.shuffle(neg_features) chosen_features_200 = pos_features[:100] + neg_features[:100] random.shuffle(chosen_features_200) featuresets = chosen_features_200 size = 200 train_set, test_set = featuresets[size / 2:], featuresets[:size / 2] classifier = nltk.DecisionTreeClassifier.train(train_set) print("Decision trees accuracy = [{0}]".format(
home = os.path.expanduser("~") path = os.path.join( home, 'nltk_data{s}corpora{s}tweets_publish_choice_{account}{s}{version}'.format( account=account_id, version=version, s=os.sep, )) print(decisions.categories()) documents = [(list(decisions.words(fileid)), category) for category in decisions.categories() for fileid in decisions.fileids(category) if os.path.getsize(os.path.join(path, fileid)) > 0] pos_features = [(features.tweet_to_words(d), c) for (d, c) in documents if c == 'pos'] neg_features = [(features.tweet_to_words(d), c) for (d, c) in documents if c == 'neg'] chosen_features_limit = pos_features[:limit / 2] + neg_features[:limit / 2] random.shuffle(chosen_features_limit) featuresets = chosen_features_limit train_set, test_set = featuresets[limit / 2:], featuresets[:limit / 2] svm = SklearnClassifier(LinearSVC()) svm.train(train_set) path = os.path.normpath('../model/svm/account_{0}/{1}/'.format(