#helper.filtered_tweet buat data latih (jadi dipisah tweet, sama targetnya yang dari tweet.dat) training_set = nltk.classify.apply_features(helper.extract_features, helper.filtered_tweet) #ini buat mulai ngetrain (ngehasilin model) classifier = nltk.NaiveBayesClassifier.train(training_set) # classifier = nltk.MaxentClassifier.train(training_set) # helper.save_model('maxent.mdl', classifier) #ini mah cuma ngeprint yang didalem file question.txt print helper.question_text # classifier = helper.load_model('naivebayes.mdl') # classifier2 = helper.load_model('sentiment-maxent.mdl') #mecah kalimat jadi kata2 print helper.replace_two_or_more_liat(tweet.split()) #corpus = data uji corpus_tag = helper.get_tag_from_corpus("tweets.dat") corpus_text = helper.get_text_from_corpus("tweets.dat") test_tag = [] #untuk nguji corpus_text = ["Aku ga suka sama kamu"] for text in corpus_text: #untuk klasifikasi si "text" di corpus result = classifier.classify(helper.extract_features(text.split())) print result # if result == '1': # result = classifier2.classify(helper.extract_features(text.split())) test_tag.append(result) corpus_tag.reverse()