# TestSet from 2013
#test_path  = 'Data/Semeval/TestSet/2013/twitter-test-GOLD-B.tsv'
print('Training/Testing using the ' + str(var.model_classifier) +
      ' classifier')
print('Reading Datasets and Pre-processing...')
# read the data in the format used by the library
semeval = SemevalTwitter(train_path, dev_path, test_path)
trainset = semeval.trainset
devset = semeval.devset
testset = semeval.testset

# Training the supervised model. You should send (tweet_message, label) for training
print('Training...')
tweets = [(tweet['MESSAGE'], tweet['SENTIMENT']) for tweet in trainset]
#tweets += [(tweet['MESSAGE'],tweet['SENTIMENT']) for tweet in devset]
classifier = TwitterHybridClassifier(tweets)

# Apply the classifier for all tweets in the testset
print('Testing...')

# count how many instances were classified by each method
# RB: Ruble-based, LB: Lexicon-base, ML: Machine Learning classifier
count = {'RB': 0, 'LB': 0, 'ML': 0}

# Evaluate if tested with the gold standard
guess = list()
gold = list()

# Keep the predictions string
output = ''
from SemevalTwitter import SemevalTwitter
from TwitterHybridClassifier import TwitterHybridClassifier

train_path='Data/tweeti-b.dist.data'
dev_path='Data/twitter-dev-gold-B.tsv'
test_path='Data/twitter-test-input-B.tsv'

semeval = SemevalTwitter(train_path,dev_path,test_path)
trainset = semeval.trainset
devset = semeval.devset
testset = semeval.testset

# Training the supervised model
print "Training..."
classifier = TwitterHybridClassifier(trainset + devset)

# Apply the classifier for all tweets in the testset
output_file = 'task2-TEAM-B-twitter-constrained.output'
fp = open(output_file,'w')
for num,tweet in enumerate(testset):
    print "Processing...",num
    tweet_class = classifier.classify(tweet['MESSAGE'])
    line = tweet['SID'] + '\t' + tweet['UID'] + '\t' + tweet_class + '\t' + tweet['MESSAGE']
    fp.write(line)
fp.close()


# Apply the classifier for all sms data in the testset
train_path='Data/tweeti-b.dist.data'
dev_path='Data/twitter-dev-gold-B.tsv'
            correct += 1
    accuracy = float(correct) / float(total)
    print('Accuracy: {:.2%}'.format(accuracy))

    # Confusion Matrix
    cm = ConfusionMatrix(gold, guess)
    print(cm)


f = codecs.open('input.txt', 'r+', encoding='utf8')
lines = f.readlines()

f1 = codecs.open('output.txt', 'r+', encoding='utf8')
lines1 = f1.readlines()

Myobject = TwitterHybridClassifier(trainset)

#count = {'RB':0, 'LB':0, 'ML':0 }

observed = list()
answer = list()

for line in lines:

    x = line.split('\t')

    prediction = Myobject.classify(x[5])

    if (len(prediction) == 1):
        result = prediction[0][0]
    elif (len(prediction) == 2):
# TestSet from 2013
#test_path  = 'Data/Semeval/TestSet/2013/twitter-test-GOLD-B.tsv'

print('Reading Datasets and Pre-processing...')
# read the data in the format used by the library
semeval = SemevalTwitter(train_path,dev_path,test_path)
trainset = semeval.trainset
devset = semeval.devset
testset = semeval.testset

# Training the supervised model. You should send (tweet_message, label) for training
print('Training...')
tweets = [(tweet['MESSAGE'],tweet['SENTIMENT']) for tweet in trainset]
tweets += [(tweet['MESSAGE'],tweet['SENTIMENT']) for tweet in devset]
classifier = TwitterHybridClassifier(tweets)


# Apply the classifier for all tweets in the testset
print('Testing...')

# count how many instances were classified by each method
# RB: Ruble-based, LB: Lexicon-base, ML: Machine Learning classifier
count = {'RB':0, 'LB':0, 'ML':0 }

# Evaluate if tested with the gold standard
guess = list()
gold = list()

# Keep the predictions string
output = ''
from SemevalTwitter import SemevalTwitter
from TwitterHybridClassifier import TwitterHybridClassifier

train_path = 'Data/tweeti-b.dist.data'
dev_path = 'Data/twitter-dev-gold-B.tsv'
test_path = 'Data/twitter-test-input-B.tsv'

semeval = SemevalTwitter(train_path, dev_path, test_path)
trainset = semeval.trainset
devset = semeval.devset
testset = semeval.testset

# Training the supervised model
print "Training..."
classifier = TwitterHybridClassifier(trainset + devset)

# Apply the classifier for all tweets in the testset
output_file = 'task2-TEAM-B-twitter-constrained.output'
fp = open(output_file, 'w')
for num, tweet in enumerate(testset):
    print "Processing...", num
    tweet_class = classifier.classify(tweet['MESSAGE'])
    line = tweet['SID'] + '\t' + tweet[
        'UID'] + '\t' + tweet_class + '\t' + tweet['MESSAGE']
    fp.write(line)
fp.close()

# Apply the classifier for all sms data in the testset
train_path = 'Data/tweeti-b.dist.data'
dev_path = 'Data/twitter-dev-gold-B.tsv'