def predict(self, X_test):
        """Makes predictions for test instances in X_test.
        Args:
            X_test(list of list of obj): The list of testing samples
                The shape of X_test is (n_test_samples, n_features)
        Returns:
            y_predicted(list of obj): The predicted target y values (parallel to X_test)
        """

        y_predicted = []
        for test in X_test:
            temp_classifier_table = self.priors.copy()
            temp_classifier_table.pop(0)  # remove header
            labels_col = myutils.get_column(self.posteriors,
                                            self.posteriors[0], "label")
            for label in test:
                label = str(label)
                i = 0  # for counting through priors
                for classifier in self.priors[0]:
                    col = myutils.get_column(self.posteriors,
                                             self.posteriors[0], classifier)
                    #print(col)
                    p_index = labels_col.index(label)
                    p_value = col[p_index]
                    #print(p_value)
                    temp_classifier_table[
                        i] = temp_classifier_table[i] * p_value
                    i += 1
                labels_col.pop(p_index)
            max_index = temp_classifier_table.index(max(temp_classifier_table))
            y_predicted.append(self.priors[0][max_index])
        return y_predicted  # TODO: copy your solution from PA5 here
Beispiel #2
0
    def predict(self, X_test):
        """Makes predictions for test instances in X_test.

        Args:
            X_test(list of list of obj): The list of testing samples
                The shape of X_test is (n_test_samples, n_features)

        Returns:
            y_predicted(list of obj): The predicted target y values (parallel to X_test)
        """
        probabilities = []
        y_predicted = []
        for i in range(len(X_test)):
            probabilities = []
            for m in range(len(self.priors)):
                prob = 1
                for k in range(len(self.posteriors)):
                    currMatrix = self.posteriors[k]
                    currCol = myutils.get_column(currMatrix,m+1)
                    for j in range(len(currMatrix)):
                        if (currMatrix[j][0] == X_test[i][k]):
                            prob = prob * currCol[j]
                probabilities.append(prob * self.priors[m])            
            maxProb = probabilities.index(max(probabilities))
            y_predicted.append(self.posteriors[0][0][maxProb +1])

        return y_predicted 
Beispiel #3
0
    def fit(self, X_train, y_train):
        """Fits a Naive Bayes classifier to X_train and y_train.

        Args:
            X_train(list of list of obj): The list of training instances (samples). 
                The shape of X_train is (n_train_samples, n_features)
            y_train(list of obj): The target y values (parallel to X_train)
                The shape of y_train is n_train_samples

        Notes:
            Since Naive Bayes is an eager learning algorithm, this method computes the prior probabilities
                and the posterior probabilities for the training data.
            You are free to choose the most appropriate data structures for storing the priors
                and posteriors.
        """
        row = []
        matrix = []
        classes = []
        posteriors = []
        labels = []
        count = 0
        self.X_train = X_train
        self.y_train = y_train

        labels.append("Attributes")
        for i in range(len(y_train)):
            if y_train[i] not in labels:
                labels.append(y_train[i])
                classes.append(y_train[i])
        
        #calculating priors
        for i in range(len(classes)):
            count = 0
            for j in range(len(y_train)):
                if (classes[i] == y_train[j]):
                    count+=1
            self.priors.append(count/len(y_train))

        for i in range(len(X_train[0])):
            curr_col = myutils.get_column(X_train,i)
            matrix = []
            matrix.append(labels)
            items = []
            for j in range(len(curr_col)):
                if (curr_col[j] not in items):
                    curr_row = []
                    currAtt = curr_col[j]
                    items.append(currAtt)
                    curr_row.append(currAtt)
                    for k in range(len(classes)):
                        count = 0
                        for l in range(len(curr_col)):
                            if curr_col[l] == currAtt and y_train[l] == classes[k]:
                                count+=1
                        curr_row.append( round((count/len(curr_col)) / self.priors[k],3))
                    matrix.append(curr_row)
            self.posteriors.append(matrix)
Beispiel #4
0
    def fit(self, X_train, y_train, user_F, user_N, user_M, random_state=None):
        """Fits a random forest classifier to X_train and y_train.

        Args:
            X_train(list of list of obj): The list of training instances (samples). 
                The shape of X_train is (n_train_samples, n_features)
            y_train(list of obj): The target y values (parallel to X_train)
                The shape of y_train is n_train_samples
        """
        if random_state is not None:
            # store seed
            self.random_state = random_state
            np.random.seed(self.random_state)
        self.X_train = X_train
        self.y_train = y_train
        self.F = user_F
        self.N = user_N
        self.M = user_M
        stratified_test, stratified_remainder = myevaluation.random_stratified_test_remainder_set(
            X_train, y_train, random_state)
        train = myutils.stitch_x_and_y_trains(X_train, y_train)
        attribute_domains = myutils.calculate_attribute_domains(
            train)  # TODO: think about if this should be X_train or "train"
        N_forest = []

        for i in range(self.N):
            bootstrapped_table = myutils.bootstrap(stratified_remainder,
                                                   random_state)
            available_attributes = myutils.get_generic_header(
                bootstrapped_table
            )  # TODO: check that this is used for only X_trains
            tree = myutils.tdidt(bootstrapped_table, available_attributes,
                                 attribute_domains, self.F)
            N_forest.append(tree)
        header = myutils.get_generic_header(stratified_remainder)
        header.append("y")
        y_predicted = []
        y_true = []
        all_accuracies = []
        # testing accuracy of N_forest trees to find the top M accuracies
        for tree in N_forest:
            y_predicted_row = []
            for item in stratified_test:
                y_predicted_row.append(
                    myutils.tdidt_predict(header, tree, item[:-1]))
            y_predicted.append(y_predicted_row)

        y_true = myutils.get_column(stratified_test, header, "y")
        for predicted_sublist in y_predicted:
            accuracy, _ = myutils.accuracy_errorrate(predicted_sublist, y_true)
            all_accuracies.append(accuracy)
        for _ in range(self.M):
            max_ind = all_accuracies.index(max(all_accuracies))
            self.forest.append(N_forest[max_ind])
            all_accuracies[max_ind] = -1
    def predict(self, xTest):

        predictions = []
        majorityVotes = []
        header = []

        for tree in self.bestM:
            predictions.append(tree.predict(xTest))

        for i in range(len(predictions[0])):
            header.append(i)
        
        for index in header:
            currPred = myutils.get_column(predictions, header, index) 
            majorityVotes.append(max(set(currPred), key=currPred.count))

        return majorityVotes
Beispiel #6
0
    def fit(self, X_train, y_train):
        """Fits a decision tree classifier to X_train and y_train using the TDIDT (top down induction of decision tree) algorithm.

        Args:
            X_train(list of list of obj): The list of training instances (samples). 
                The shape of X_train is (n_train_samples, n_features)
            y_train(list of obj): The target y values (parallel to X_train)
                The shape of y_train is n_train_samples

        Notes:
            Since TDIDT is an eager learning algorithm, this method builds a decision tree model
                from the training data.
            Build a decision tree using the nested list representation described in class.
            Store the tree in the tree attribute.
            Use attribute indexes to construct default attribute names (e.g. "att0", "att1", ...).
        """
         # fit() accepts X_train and y_train
        # TODO: calculate the attribute domains dictionary

        header = []
        attribute_domains = {}
        
        #loops through X_train and creates header
        for i in range(len(X_train[0])) :
            header.append("att" + str(i))

        #loops though header to form attribute domains dictionairy
        count = 0
        for item in header:
            curr_col = myutils.get_column(X_train, count)
            values, counts = myutils.get_frequencies(curr_col)
            attribute_domains[item] = values
            count+=1

        #stitching together X_train and y_train and getting available attributes
        train = [X_train[i] + [y_train[i]] for i in range(len(X_train))]
        available_attributes = header.copy()
    
        #forming tree
        self.tree = myutils.tdidt(train, available_attributes, attribute_domains, header)
        self.print_decision_rules()
 def fit(self, X_train, y_train):
     """Fits a Naive Bayes classifier to X_train and y_train.
     Args:
         X_train(list of list of obj): The list of training instances (samples). 
             The shape of X_train is (n_train_samples, n_features)
         y_train(list of obj): The target y values (parallel to X_train)
             The shape of y_train is n_train_samples
     Notes:
         Since Naive Bayes is an eager learning algorithm, this method computes the prior probabilities
             and the posterior probabilities for the training data.
         You are free to choose the most appropriate data structures for storing the priors
             and posteriors.
     """
     self.X_train = X_train
     self.y_train = y_train
     self.priors = []
     self.posteriors = []
     header = []
     X_train_copy = X_train.copy()
     for i in range(len(X_train_copy)):
         X_train_copy[i].append(y_train[i])
     for i in range(len(X_train_copy[0])):
         header.append(str(i + 1))
     classifier_names, classifier_subtables = myutils.group_by(
         X_train_copy, header, str(len(X_train_copy[0])))
     self.priors.append(classifier_names)
     for subtable in classifier_subtables:
         self.priors.append(len(subtable) / len(X_train_copy))
     posteriors_header = []
     #print(self.priors)
     for i in range(len(X_train_copy[i]) - 1, 0, -1):
         temp_names, temp_subtables = myutils.group_by(
             X_train_copy, header, str(i))
         for name in temp_names:
             posteriors_header.append(name)
     posteriors_row = []
     for i in range(len(posteriors_header) + 1):
         for j in range(len(classifier_names) + 1):
             posteriors_row.append(0)
         self.posteriors.append(posteriors_row)
         posteriors_row = []
     self.posteriors[0][0] = "label"
     for i in range(len(self.posteriors[0]) - 1):
         self.posteriors[0][i + 1] = classifier_names[i]
     for i in range(1, len(self.posteriors)):
         self.posteriors[i][0] = str(posteriors_header[i - 1])
     for k in range(len(classifier_subtables)):
         header_col = myutils.get_column(self.posteriors,
                                         self.posteriors[0],
                                         self.posteriors[0][0])
         for i in range(len(header) - 1):
             col = myutils.get_column(classifier_subtables[k], header,
                                      header[i])
             values, counts = myutils.get_frequencies(col)
             for j in range(len(counts)):
                 row_index = header_col.index(str(values[j]))
                 header_col[row_index] = 0
                 col_index = self.posteriors[0].index(classifier_names[k])
                 self.posteriors[row_index][col_index] = counts[j] / len(
                     classifier_subtables[k])
     pass  # TODO: copy your solution from PA5 here
Beispiel #8
0
def predict():
    # dating = request.args.get("dating", "")
    # violence = request.args.get("violence", "")
    # world_life = request.args.get("world_life", "")
    # night_time = request.args.get("night_time", "")
    # shake_the_audience = request.args.get("shake_the_audience", "")
    # family_gospel = request.args.get("family_gospel", "")
    # romantic = request.args.get("romantic", "")
    # communication = request.args.get("communication", "")
    # obscene = request.args.get("obscene", "")
    # music = request.args.get("music", "")
    # movement_places = request.args.get("movement_places", "")
    # light_visual_perceptions = request.args.get("light_visual_perceptions", "")
    # family_spiritual = request.args.get("family_spiritual", "")
    # like_girls = request.args.get("like_girls", "")
    sadness = request.args.get("sadness", 5)
    feelings = request.args.get("feelings", 5)
    danceability = request.args.get("danceability", 5)
    loudness = request.args.get("loudness", 5)
    accousticness = request.args.get("accousticness", 5)
    instumentalness = request.args.get("instrumentalness", 5)
    valence = request.args.get("valence", 5)
    energy = request.args.get("energy", 5)
    # age = request.args.get("age", "")

    # get data to fit
    table = mpt.MyPyTable().load_from_file("tcc_ceds_music.csv")

    new_table = myutils.get_even_classifier_instances(table)
    genre_col = myutils.get_column(new_table.data, new_table.column_names,
                                   "genre")
    new_table = myutils.categorize_values(new_table)

    X = []
    X.append(new_table.get_column("sadness"))
    X.append(new_table.get_column("feelings"))
    X.append(new_table.get_column("danceability"))
    X.append(new_table.get_column("loudness"))
    X.append(new_table.get_column("acousticness"))
    X.append(new_table.get_column("instrumentalness"))
    X.append(new_table.get_column("valence"))
    X.append(new_table.get_column("energy"))
    # X.append(genre_col)
    X = myutils.transpose(X)

    # create knn classifier
    knn_classifier = MyKNeighborsClassifier()
    knn_classifier.fit(X, genre_col)
    try:
        print("sadness:", sadness)
        prediction = knn_classifier.predict([[
            sadness, feelings, danceability, loudness, acousticness,
            instrumentalness, valence, energy
        ]])
        print(prediction)
    except:
        print("feelings:", feelings)
        prediction = None
        print("in except block")

    if prediction is not None:
        result = {"prediction": prediction}
        return jsonify(result), 200
    else:
        results_array = [
            "pop", "hip hop", "rock", "blues", "country", "jazz", "raggae"
        ]
        rand_int = random.randint(0, len(results_array))
        result = {"prediction": results_array[rand_int]}
        return jsonify(result), 200
Beispiel #9
0
    def fit(self, X_train, y_train):
        """Fits a decision tree classifier to X_train and y_train using the TDIDT (top down induction of decision tree) algorithm.

        Args:
            X_train(list of list of obj): The list of training instances (samples). 
                The shape of X_train is (n_train_samples, n_features)
            y_train(list of obj): The target y values (parallel to X_train)
                The shape of y_train is n_train_samples

        Notes:
            Since TDIDT is an eager learning algorithm, this method builds a decision tree model
                from the training data.
            Build a decision tree using the nested list representation described in class.
            Store the tree in the tree attribute.
            Use attribute indexes to construct default attribute names (e.g. "att0", "att1", ...).
        """
         # fit() accepts X_train and y_train
        # TODO: calculate the attribute domains dictionary

        if (self.seed != None):
            random.seed(self.seed)
            
        n_trees = []
        accuracies = []
        for i in range(self.N):
            header = []
            attribute_domains = {}
            
            #loops through X_train and creates header
            for i in range(len(X_train[0])) :
                header.append("att" + str(i))
            

            #loops though header to form attribute domains dictionairy
            count = 0
            for item in header:
                curr_col = myutils.get_column(X_train, count)
                values, counts = myutils.get_frequencies(curr_col)
                attribute_domains[item] = values
                count+=1
                

            #stitching together X_train and y_train and getting available attributes
            train = [X_train[i] + [y_train[i]] for i in range(len(X_train))]
            available_attributes = header.copy()

            boot_train = myutils.compute_bootstrapped_sample(train)

            validation_set = []
            for row in train:
                if row not in boot_train:
                    validation_set.append(row)



            #forming tree
            tree = myutils.tdidt_forest(boot_train, available_attributes, attribute_domains, header, self.F)
            #print(tree)

            tree_dict = {}
            tree_dict["tree"] = tree
            y_test = []
            for row in validation_set:
                y_test.append(row.pop())
            
            y_predict = myutils.predict_tree(validation_set, tree)

            acc = myutils.get_accuracy(y_predict, y_test)
            tree_dict["acc"] = acc
            n_trees.append(tree_dict)
        

        sorted_trees = sorted(n_trees, key=lambda k: k['acc'], reverse=True)
        for i in range(self.M):
            self.trees.append(sorted_trees[i]["tree"])