def fit(self, cleaned_data, terms, target, stopwords, idf, weight=None):
        self.cleaned_data = cleaned_data
        self.terms = terms
        self.target = target
        if weight == None:
            weighting = Weighting(self.cleaned_data, self.terms)
            self.weighted_terms = weighting.get_tf_idf_weighting()
        else:
            self.weighted_terms = weight
            self.idf = idf
        self.stopwords = stopwords

        for i in range(len(self.cleaned_data)):
            total_word = 0
            for term in self.terms:
                total_word += self.weighted_terms[term][i]
            self.total.append(total_word)

        for term in self.terms:
            self.con_prob_negative.append(
                self.calculate_probability_multinomial(term, 'Negatif'))
            self.con_prob_neutral.append(
                self.calculate_probability_multinomial(term, 'Netral'))
            self.con_prob_positive.append(
                self.calculate_probability_multinomial(term, 'Positif'))

        self.likelihood = {}
        indexKomentar = 0
        for term in self.terms:
            temp = []
            temp.append(self.con_prob_negative[indexKomentar])
            temp.append(self.con_prob_neutral[indexKomentar])
            temp.append(self.con_prob_positive[indexKomentar])
            self.likelihood[term] = temp
            # buatprint = []
            # buatprint.append(term)
            # for t in temp:
            #     formatdulu = "{:.6f}".format(t)
            #     buatprint.append(str(formatdulu))
            # print(";".join(buatprint))
            indexKomentar += 1

        self.prior_negative = self.getTotalDocumentWithSpecificCategory(
            'Negatif') / self.getTotalDocument()
        self.prior_neutral = self.getTotalDocumentWithSpecificCategory(
            'Netral') / self.getTotalDocument()
        self.prior_positive = self.getTotalDocumentWithSpecificCategory(
            'Positif') / self.getTotalDocument()
Example #2
0
new_cleaned_data = pickle.load(result_data_file)
result_data_file.close()

result_data_file = open(nt_filename, 'rb')
new_terms = pickle.load(result_data_file)
result_data_file.close()

result_data_file = open(rw_filename, 'rb')
removed_words = pickle.load(result_data_file)
result_data_file.close()

result_data_file = open(stopwords_filename, 'rb')
stopwords = pickle.load(result_data_file)
result_data_file.close()

weight = Weighting(new_cleaned_data, new_terms)
tfidf = weight.get_tf_idf_weighting()
idf = weight.get_idf()

nb = NBMultinomial()
nb.fit(new_cleaned_data, new_terms, data_train[i]["target"], stopwords, idf,
       tfidf)


def predict(input_tweet):
    prediction, negatif, netral, positif, used_terms = nb.predict(input_tweet)
    result_dict = {}
    result_dict["success"] = True
    result_dict["type"] = "TBRS"
    result_dict["data"] = {
        "prediction": prediction,