# TestSet from 2013 #test_path = 'Data/Semeval/TestSet/2013/twitter-test-GOLD-B.tsv' print('Training/Testing using the ' + str(var.model_classifier) + ' classifier') print('Reading Datasets and Pre-processing...') # read the data in the format used by the library semeval = SemevalTwitter(train_path, dev_path, test_path) trainset = semeval.trainset devset = semeval.devset testset = semeval.testset # Training the supervised model. You should send (tweet_message, label) for training print('Training...') tweets = [(tweet['MESSAGE'], tweet['SENTIMENT']) for tweet in trainset] #tweets += [(tweet['MESSAGE'],tweet['SENTIMENT']) for tweet in devset] classifier = TwitterHybridClassifier(tweets) # Apply the classifier for all tweets in the testset print('Testing...') # count how many instances were classified by each method # RB: Ruble-based, LB: Lexicon-base, ML: Machine Learning classifier count = {'RB': 0, 'LB': 0, 'ML': 0} # Evaluate if tested with the gold standard guess = list() gold = list() # Keep the predictions string output = ''
from SemevalTwitter import SemevalTwitter from TwitterHybridClassifier import TwitterHybridClassifier train_path='Data/tweeti-b.dist.data' dev_path='Data/twitter-dev-gold-B.tsv' test_path='Data/twitter-test-input-B.tsv' semeval = SemevalTwitter(train_path,dev_path,test_path) trainset = semeval.trainset devset = semeval.devset testset = semeval.testset # Training the supervised model print "Training..." classifier = TwitterHybridClassifier(trainset + devset) # Apply the classifier for all tweets in the testset output_file = 'task2-TEAM-B-twitter-constrained.output' fp = open(output_file,'w') for num,tweet in enumerate(testset): print "Processing...",num tweet_class = classifier.classify(tweet['MESSAGE']) line = tweet['SID'] + '\t' + tweet['UID'] + '\t' + tweet_class + '\t' + tweet['MESSAGE'] fp.write(line) fp.close() # Apply the classifier for all sms data in the testset train_path='Data/tweeti-b.dist.data' dev_path='Data/twitter-dev-gold-B.tsv'
correct += 1 accuracy = float(correct) / float(total) print('Accuracy: {:.2%}'.format(accuracy)) # Confusion Matrix cm = ConfusionMatrix(gold, guess) print(cm) f = codecs.open('input.txt', 'r+', encoding='utf8') lines = f.readlines() f1 = codecs.open('output.txt', 'r+', encoding='utf8') lines1 = f1.readlines() Myobject = TwitterHybridClassifier(trainset) #count = {'RB':0, 'LB':0, 'ML':0 } observed = list() answer = list() for line in lines: x = line.split('\t') prediction = Myobject.classify(x[5]) if (len(prediction) == 1): result = prediction[0][0] elif (len(prediction) == 2):
# TestSet from 2013 #test_path = 'Data/Semeval/TestSet/2013/twitter-test-GOLD-B.tsv' print('Reading Datasets and Pre-processing...') # read the data in the format used by the library semeval = SemevalTwitter(train_path,dev_path,test_path) trainset = semeval.trainset devset = semeval.devset testset = semeval.testset # Training the supervised model. You should send (tweet_message, label) for training print('Training...') tweets = [(tweet['MESSAGE'],tweet['SENTIMENT']) for tweet in trainset] tweets += [(tweet['MESSAGE'],tweet['SENTIMENT']) for tweet in devset] classifier = TwitterHybridClassifier(tweets) # Apply the classifier for all tweets in the testset print('Testing...') # count how many instances were classified by each method # RB: Ruble-based, LB: Lexicon-base, ML: Machine Learning classifier count = {'RB':0, 'LB':0, 'ML':0 } # Evaluate if tested with the gold standard guess = list() gold = list() # Keep the predictions string output = ''
from SemevalTwitter import SemevalTwitter from TwitterHybridClassifier import TwitterHybridClassifier train_path = 'Data/tweeti-b.dist.data' dev_path = 'Data/twitter-dev-gold-B.tsv' test_path = 'Data/twitter-test-input-B.tsv' semeval = SemevalTwitter(train_path, dev_path, test_path) trainset = semeval.trainset devset = semeval.devset testset = semeval.testset # Training the supervised model print "Training..." classifier = TwitterHybridClassifier(trainset + devset) # Apply the classifier for all tweets in the testset output_file = 'task2-TEAM-B-twitter-constrained.output' fp = open(output_file, 'w') for num, tweet in enumerate(testset): print "Processing...", num tweet_class = classifier.classify(tweet['MESSAGE']) line = tweet['SID'] + '\t' + tweet[ 'UID'] + '\t' + tweet_class + '\t' + tweet['MESSAGE'] fp.write(line) fp.close() # Apply the classifier for all sms data in the testset train_path = 'Data/tweeti-b.dist.data' dev_path = 'Data/twitter-dev-gold-B.tsv'