("won", "happy"), ("beautiful", "happy"), ("amazing", "happy"), ("good", "happy"), ("great", "happy"), ("lovely", "happy"), ("feeling awesome", "happy"), ("pleasure", "happy"), ("bliss", "happy"), ("delight", "happy"), ("enjoy", "happy"), ("joy", "happy"), ("cheerful", "happy"), ("laugh", "happy"), ("well-being", "happy"), ("prosperity", "happy"), ("cheer", "happy"), ("ecstacy", "happy"), ("rejoice", "happy"), ("unhappy", "sad"), ("depression", "sad"), ("displeasure", "sad"), ("trouble", "sad"), ("worry", "sad"), ("upset", "sad"), ("sad", "sad"), ("misery", "sad"), ("pain", "sad"), ("sorry", "sad"), ("trouble", "sad"), ("broke up", "sad"), ("sorry", "sad"), ("bad", "sad"), ("failed", "sad"), ("broke", "sad"), ("kicked", "sad"), ("not going well", "sad")] model = nbc(test_corpus) #happy def happy(): x = random.randint(0, len(happy_list) - 1) reply = happy_list[x] engine.say(reply) engine.runAndWait() #sad def sad():
train.append((summary, 'unhelpful')) except Exception as ex: print type(ex) print ex.args print ex errorTraining += 1 while not (reviewsFile.readline() in ["\n", '\r\n']): pass finally: reviewCount += 1 if reviewCount % checkNum == 0: print "Still here at review", reviewCount print "starting textblob training" cl = nbc(train) print "\nTraining Done" print "\nBeginning Classification\n" while reviewCount <= 10000: try: line = reviewsFile.readline() if not line: break productId = line[19:-1] userId = reviewsFile.readline()[15:-1] profileName = reviewsFile.readline()[20:-1] temp = reviewsFile.readline()[20:-1].split("/") helpfulRatings = int(temp[0])
#Prepare training and testing dataset (80:20 % in our case) path = "/home/ubuntu/SentenceCorpus/labeled_articles" text_files = txt_file(path) training_data = text_files[0:round(len(text_files) * .8)] test_data = text_files[round(len(text_files) * .8):] #write training data into a json file label, sentence = txt_to_sentence(training_data) data_t = [] for i in range(len(sentence)): data = {'label': label[i], "text": sentence[i]} data_t.append(data) f_name = "/home/ubuntu/SentenceCorpus" + "/" + "training" + ".json" with open(f_name, 'w') as f: json.dump(data_t, f, ensure_ascii=True) #Naive Bayes Classifier for sentence classification with open(f_name, 'r') as f: nb = nbc(f, format="json") #validation using test dataset test_l, test_s = txt_to_sentence(test_data) counter = 0 for i in range(len(test_s)): predicted_label = nb.classify(test_s[i]).upper() original_label = (test_l[i]).upper() if (predicted_label == original_label): counter += 1 print("Accuracy: " + str(round(counter * 100 / len(test_s), 2)) + "%") print("cheers")
train.append((summary, 'unhelpful')) except Exception as ex: print type(ex) print ex.args print ex errorTraining += 1 while not (reviewsFile.readline() in ["\n", '\r\n']): pass finally: reviewCount += 1 if reviewCount % checkNum == 0: print "Still here at review", reviewCount print "starting textblob training" cl = nbc(train) print "\nTraining Done" print "\nBeginning Classification\n" while reviewCount <= 10000: try: line = reviewsFile.readline() if not line: break; productId = line[19:-1] userId = reviewsFile.readline()[15:-1] profileName = reviewsFile.readline()[20:-1] temp = reviewsFile.readline()[20:-1].split("/")