Beispiel #1
0
               ("won", "happy"), ("beautiful", "happy"), ("amazing", "happy"),
               ("good", "happy"), ("great", "happy"), ("lovely", "happy"),
               ("feeling awesome", "happy"), ("pleasure", "happy"),
               ("bliss", "happy"), ("delight", "happy"), ("enjoy", "happy"),
               ("joy", "happy"), ("cheerful", "happy"), ("laugh", "happy"),
               ("well-being", "happy"), ("prosperity", "happy"),
               ("cheer", "happy"), ("ecstacy", "happy"), ("rejoice", "happy"),
               ("unhappy", "sad"), ("depression", "sad"),
               ("displeasure", "sad"), ("trouble", "sad"), ("worry", "sad"),
               ("upset", "sad"), ("sad", "sad"), ("misery", "sad"),
               ("pain", "sad"), ("sorry", "sad"), ("trouble", "sad"),
               ("broke up", "sad"), ("sorry", "sad"), ("bad", "sad"),
               ("failed", "sad"), ("broke", "sad"), ("kicked", "sad"),
               ("not going well", "sad")]

model = nbc(test_corpus)

#happy


def happy():
    x = random.randint(0, len(happy_list) - 1)
    reply = happy_list[x]
    engine.say(reply)
    engine.runAndWait()


#sad


def sad():
            train.append((summary, 'unhelpful'))

    except Exception as ex:
        print type(ex)
        print ex.args
        print ex
        errorTraining += 1
        while not (reviewsFile.readline() in ["\n", '\r\n']):
            pass
    finally:
        reviewCount += 1
        if reviewCount % checkNum == 0:
            print "Still here at review", reviewCount

print "starting textblob training"
cl = nbc(train)
print "\nTraining Done"
print "\nBeginning Classification\n"

while reviewCount <= 10000:
    try:
        line = reviewsFile.readline()
        if not line:
            break
        productId = line[19:-1]

        userId = reviewsFile.readline()[15:-1]
        profileName = reviewsFile.readline()[20:-1]

        temp = reviewsFile.readline()[20:-1].split("/")
        helpfulRatings = int(temp[0])
#Prepare training and testing dataset (80:20 % in our case)
path = "/home/ubuntu/SentenceCorpus/labeled_articles"
text_files = txt_file(path)
training_data = text_files[0:round(len(text_files) * .8)]
test_data = text_files[round(len(text_files) * .8):]
#write training data into a json file
label, sentence = txt_to_sentence(training_data)
data_t = []
for i in range(len(sentence)):
    data = {'label': label[i], "text": sentence[i]}
    data_t.append(data)
f_name = "/home/ubuntu/SentenceCorpus" + "/" + "training" + ".json"
with open(f_name, 'w') as f:
    json.dump(data_t, f, ensure_ascii=True)

#Naive Bayes Classifier for sentence classification
with open(f_name, 'r') as f:
    nb = nbc(f, format="json")

#validation using test dataset
test_l, test_s = txt_to_sentence(test_data)
counter = 0
for i in range(len(test_s)):
    predicted_label = nb.classify(test_s[i]).upper()
    original_label = (test_l[i]).upper()
    if (predicted_label == original_label):
        counter += 1

print("Accuracy: " + str(round(counter * 100 / len(test_s), 2)) + "%")
print("cheers")
            train.append((summary, 'unhelpful'))
        
    except Exception as ex:
        print type(ex)
        print ex.args
        print ex
        errorTraining += 1
        while not (reviewsFile.readline() in ["\n", '\r\n']):
            pass
    finally:
        reviewCount += 1
        if reviewCount % checkNum == 0:
            print "Still here at review", reviewCount        

print "starting textblob training"
cl = nbc(train)
print "\nTraining Done"
print "\nBeginning Classification\n"

while reviewCount <= 10000:
    try:
        line = reviewsFile.readline()
        if not line:
            break;
        productId = line[19:-1]

    
        userId = reviewsFile.readline()[15:-1]
        profileName = reviewsFile.readline()[20:-1]
    
        temp = reviewsFile.readline()[20:-1].split("/")