コード例 #1
0
f1 = codecs.open('output.txt', 'r+', encoding='utf8')
lines1 = f1.readlines()

Myobject = TwitterHybridClassifier(trainset)

#count = {'RB':0, 'LB':0, 'ML':0 }

observed = list()
answer = list()

for line in lines:

    x = line.split('\t')

    prediction = Myobject.classify(x[5])

    if (len(prediction) == 1):
        result = prediction[0][0]
    elif (len(prediction) == 2):
        result = prediction[1][0]
    else:
        result = prediction[2][0]

    #count[method] += 1
    observed.append(result)

    #print(prediction)

for line in lines1:
    line = line.strip()
semeval = SemevalTwitter(train_path,dev_path,test_path)
trainset = semeval.trainset
devset = semeval.devset
testset = semeval.testset

# Training the supervised model
print "Training..."
classifier = TwitterHybridClassifier(trainset + devset)

# Apply the classifier for all tweets in the testset
output_file = 'task2-TEAM-B-twitter-constrained.output'
fp = open(output_file,'w')
for num,tweet in enumerate(testset):
    print "Processing...",num
    tweet_class = classifier.classify(tweet['MESSAGE'])
    line = tweet['SID'] + '\t' + tweet['UID'] + '\t' + tweet_class + '\t' + tweet['MESSAGE']
    fp.write(line)
fp.close()


# Apply the classifier for all sms data in the testset
train_path='Data/tweeti-b.dist.data'
dev_path='Data/twitter-dev-gold-B.tsv'
test_path='Data/sms-test-input-B.tsv'

semeval = SemevalTwitter(train_path,dev_path,test_path)
testset = semeval.testset

output_file = 'task2-TEAM-B-sms-constrained.output'
fp = open(output_file,'w')
semeval = SemevalTwitter(train_path, dev_path, test_path)
trainset = semeval.trainset
devset = semeval.devset
testset = semeval.testset

# Training the supervised model
print "Training..."
classifier = TwitterHybridClassifier(trainset + devset)

# Apply the classifier for all tweets in the testset
output_file = 'task2-TEAM-B-twitter-constrained.output'
fp = open(output_file, 'w')
for num, tweet in enumerate(testset):
    print "Processing...", num
    tweet_class = classifier.classify(tweet['MESSAGE'])
    line = tweet['SID'] + '\t' + tweet[
        'UID'] + '\t' + tweet_class + '\t' + tweet['MESSAGE']
    fp.write(line)
fp.close()

# Apply the classifier for all sms data in the testset
train_path = 'Data/tweeti-b.dist.data'
dev_path = 'Data/twitter-dev-gold-B.tsv'
test_path = 'Data/sms-test-input-B.tsv'

semeval = SemevalTwitter(train_path, dev_path, test_path)
testset = semeval.testset

output_file = 'task2-TEAM-B-sms-constrained.output'
fp = open(output_file, 'w')