Ejemplo n.º 1
0
def main():
	me=Classifier()
	feature_counter=Counter()
	feature_set=pickle.load(open('validation_set.pkl', 'rb'))
	feature_set_labels=[]
	for tweet, rating in feature_set:
		print rating
		try:
			float(rating)
		except:
			continue
		if float(rating)>0:
			label='positive'
		elif float(rating)<0:
			label='negative'
		else:
			label='neutral'
		feature_set_labels.append((tweet, label))
	feature_list=chain.from_iterable([word_tokenize(process_tweet(tweet)) for tweet, sentiment in feature_set_labels])
	for feat in feature_list:
		feature_counter[feat]+=1
	me.feature_list=[feat for feat, count in feature_counter.most_common(1000)]
	ts=[(me.extract_features(tweet), label) for tweet, label in feature_set]
	print 'training Maxent'
	me.classifier=MaxentClassifier.train(ts)
	return me
Ejemplo n.º 2
0
def main():
	me=Classifier()
	feature_counter=Counter()
	feature_set=pickle.load(open('undersampled_emoticon.pkl', 'rb'))
	feature_list=chain.from_iterable([word_tokenize(process_tweet(tweet)) for tweet, sentiment in feature_set])
	for feat in feature_list:
		feature_counter[feat]+=1
	me.feature_list=[feat for feat, count in feature_counter.most_common(1000)]
	ts=[(me.extract_features(tweet), label) for tweet, label in feature_set]
	print 'training Maxent, algorithm CG'
	me.classifier=MaxentClassifier.train(ts)
	return me