Beispiel #1
0
 def predict(self, y_test):
     vals, counts = myutils.get_freq_str(self.y_train)
     i = counts.index(max(counts))
     y_predict = vals[i]
     y_predz = []
     for i in range(len(y_test)):
         y_predz.append(y_predict)
     return y_predz
Beispiel #2
0
    def predict(self):
        vals, counts = myutils.get_freq_str(self.y_train)
        p_list = []
        for count in counts:
            curr_p = count / len(self.y_train)
            p_list.append(curr_p)

        pred = np.random.choice(vals, p=p_list)

        return pred
Beispiel #3
0
    def fit(self, X_train, y_train):
        """Fits a Naive Bayes classifier to X_train and y_train.

        Args:
            X_train(list of list of obj): The list of training instances (samples). 
                The shape of X_train is (n_train_samples, n_features)
            y_train(list of obj): The target y values (parallel to X_train)
                The shape of y_train is n_train_samples

        Notes:
            Since Naive Bayes is an eager learning algorithm, this method computes the prior probabilities
                and the posterior probabilities for the training data.
            You are free to choose the most appropriate data structures for storing the priors
                and posteriors.

        
        Priors
            1. Probability of C (label)
                P(C) = Total/NumOfInsancesOfC
                ex: #C/Total
            2. Probability of X, (instance/row)
        Posteriors
            1. Probability of row given class label
                use independence assumption
                P(X|C) = P(V1 and C) * P(V2 and C) *...etc
                    P(V|C) = (#C&V/TotalLenTable)/P(C)
                    **only for categorical
            2. Probability of class label given row
                P(C|X) = P(X|C)*P(C)

        
        """

        #priors

        #get each class
        self.priors = []
        self.posteriors = []
        if isinstance(y_train[0], int):
            c_list, counts = myutils.get_freq_1col(y_train)
        else:
            c_list, counts = myutils.get_freq_str(y_train)

        #create list of priors objects, [label, probability], add to priors
        for i in range(len(c_list)):
            p = counts[i] / len(y_train)
            prior = [c_list[i], p]
            self.priors.append(prior)

        #posteriors

        #calculate probability of V and C for every possible V for each col (excluding c col)
        #loop through each col
        for i in range(len(X_train[0])):
            col = myutils.get_col_byindex(X_train, i)

            #############check if values in col are categorical or coninuous

            #get a list of every possible value and their counts (get_freq)
            val_list, counts = myutils.get_freq_str(col)
            #create list of posterior objects, [value, probability], add to this col's posteriors list

            #create list to hold all posteriors for col
            col_posteriors = [i]
            #loop through each C
            for c_index in range(len(c_list)):
                #create list to hold P(V|C)'s for this class
                posteriors = [c_list[c_index]]
                #loop through each V
                for V in val_list:
                    # create var to hold the count for number of rows that are C&V
                    count = 0
                    #loop through each row
                    for j in range(len(X_train)):
                        #if C&V then count++
                        if str(X_train[j][i]) == str(V) and str(
                                y_train[j]) == str(c_list[c_index]):
                            count += 1

                    # calc P(V|C) = count/Total#Rows
                    p = count / len(y_train)

                    p = p / self.priors[c_index][1]

                    # make [V_name, P] obj
                    posterior = [V, p]
                    #append obj to list of P(V|C)'s for this class
                    posteriors.append(posterior)
                col_posteriors.append(posteriors)
            #append col_posteriors, [col_index, [class_label, [val_name, P] ] ], to self.posteriors
            self.posteriors.append(col_posteriors)
        pass  # TODO: fix this
 def predict(self):
     vals, counts = myutils.get_freq_str(self.y_train)
     i = counts.index(max(counts))
     y_predict = vals[i]
     return y_predict