Example #1
    def predict(self, X_test):
        """Makes predictions for test instances in X_test.

            X_test(list of list of obj): The list of testing samples
                The shape of X_test is (n_test_samples, n_features)

            y_predicted(list of obj): The predicted target y values (parallel to X_test)

        header = []
        predictions = []
        for i in range(0, len(X_test[0])):
            header.append("att" + str(i))
        for instance in X_test:
            tree_predictions = {}
            for tree in self.trees:
                prediction = myutils.tdidt_predict(header, tree["tree"],
                if prediction in tree_predictions:
                    tree_predictions[prediction] += 1
                    tree_predictions[prediction] = 1

            max_key = max(tree_predictions, key=tree_predictions.get)
        return predictions
 def predict(self, X_test):
     """Makes predictions for test instances in X_test.
         X_test(list of list of obj): The list of testing samples
             The shape of X_test is (n_test_samples, n_features)
         y_predicted(list of obj): The predicted target y values (parallel to X_test)
     y_predicted = []
     all_predictions = []
     for test in X_test:
         temp = []
         for i in range(len(self.trees)):
             tree = self.trees[i]
             heading = []
             test_sub_set = []
             for j in range(len(self.attribute_indexes[i])):
                 heading_value = "att" + str(j)
             temp.append(myutils.tdidt_predict(heading, tree, test_sub_set))
     for item in all_predictions:
     return y_predicted
    def fit(self, X_train, y_train):
        """Fits a decision tree classifier to X_train and y_train using the TDIDT (top down induction of decision tree) algorithm.

            X_train(list of list of obj): The list of training instances (samples). 
                The shape of X_train is (n_train_samples, n_features)
            y_train(list of obj): The target y values (parallel to X_train)
                The shape of y_train is n_train_samples

            Since TDIDT is an eager learning algorithm, this method builds a decision tree model
                from the training data.
            Build a decision tree using the nested list representation described in class.
            Store the tree in the tree attribute.
            Use attribute indexes to construct default attribute names (e.g. "att0", "att1", ...).
        header = ['att' + str(i) for i in range(len(X_train[0]))]
        attribute_domains = {}
        for i, val in enumerate(header):
            attribute_domains[val] = myutils.unique_index(X_train, i)

        self.X_train = X_train
        self.y_train = y_train
        sample_X_train, sample_x_test, sample_y_train, sample_y_test = myevaluation.train_test_split(
            X_train, y_train, test_size=0.33, shuffle=True)
        train = [
            sample_X_train[i] + [sample_y_train[i]]
            for i in range(len(sample_X_train))

        for _ in range(self.N):
            available_attributes = header.copy()
                    available_attributes, attribute_domains, header, self.F))

        accuracies = []
        for tree in self.trees:
            header = ['att' + str(i) for i in range(len(sample_x_test[0]))]
            prediction = []
            for row in sample_x_test:
                prediction.append(myutils.tdidt_predict(header, tree, row))
            accuracy = 0
            for i in range(len(prediction)):
                if prediction[i] == sample_y_test[i]:
                    accuracy += 1
            accuracy /= len(sample_y_test)
        # find m most accurate
        m_trees = []
        for i in range(len(accuracies)):
        accuracies = sorted(accuracies)
        for i in range(self.M):
            m_trees.append(self.trees[accuracies[-(i + 1)][1]])
        self.trees = m_trees
Example #4
    def fit(self, X_train, y_train, user_F, user_N, user_M, random_state=None):
        """Fits a random forest classifier to X_train and y_train.

            X_train(list of list of obj): The list of training instances (samples). 
                The shape of X_train is (n_train_samples, n_features)
            y_train(list of obj): The target y values (parallel to X_train)
                The shape of y_train is n_train_samples
        if random_state is not None:
            # store seed
            self.random_state = random_state
        self.X_train = X_train
        self.y_train = y_train
        self.F = user_F
        self.N = user_N
        self.M = user_M
        stratified_test, stratified_remainder = myevaluation.random_stratified_test_remainder_set(
            X_train, y_train, random_state)
        train = myutils.stitch_x_and_y_trains(X_train, y_train)
        attribute_domains = myutils.calculate_attribute_domains(
            train)  # TODO: think about if this should be X_train or "train"
        N_forest = []

        for i in range(self.N):
            bootstrapped_table = myutils.bootstrap(stratified_remainder,
            available_attributes = myutils.get_generic_header(
            )  # TODO: check that this is used for only X_trains
            tree = myutils.tdidt(bootstrapped_table, available_attributes,
                                 attribute_domains, self.F)
        header = myutils.get_generic_header(stratified_remainder)
        y_predicted = []
        y_true = []
        all_accuracies = []
        # testing accuracy of N_forest trees to find the top M accuracies
        for tree in N_forest:
            y_predicted_row = []
            for item in stratified_test:
                    myutils.tdidt_predict(header, tree, item[:-1]))

        y_true = myutils.get_column(stratified_test, header, "y")
        for predicted_sublist in y_predicted:
            accuracy, _ = myutils.accuracy_errorrate(predicted_sublist, y_true)
        for _ in range(self.M):
            max_ind = all_accuracies.index(max(all_accuracies))
            all_accuracies[max_ind] = -1
    def predict(self, X_test):
        """Makes predictions for test instances in X_test.

            X_test(list of list of obj): The list of testing samples
                The shape of X_test is (n_test_samples, n_features)

            y_predicted(list of obj): The predicted target y values (parallel to X_test)
        header = ['att' + str(i) for i in range(len(X_test[0]))]
        res = []
        for row in X_test:
            res.append(myutils.tdidt_predict(header, self.tree, row))
        return res
 def predict(self, X_test):
     """Makes predictions for test instances in X_test.
         X_test(list of list of obj): The list of testing samples
             The shape of X_test is (n_test_samples, n_features)
         y_predicted(list of obj): The predicted target y values (parallel to X_test)
     heading = []
     y_predicted = []
     for i in range(len(self.X_train[0])):
         heading_value = "att" + str(i)
     for test in X_test:
         y_predicted.append(myutils.tdidt_predict(heading, self.tree, test))
     return y_predicted  # TODO: fix this
Example #7
    def predict(self, X_test):
        """Makes predictions for test instances in X_test.

            X_test(list of list of obj): The list of testing samples
                The shape of X_test is (n_test_samples, n_features)

            y_predicted(list of obj): The predicted target y values (parallel to X_test)
        header = []
        predictions = []
        for i in range(0, len(X_test[0])):
            header.append("att" + str(i))
        for instance in X_test:
            prediction = myutils.tdidt_predict(header, self.tree, instance)
        return predictions
Example #8
    def predict(self, X_test):
        """Makes predictions for test instances in X_test.

            X_test(list of list of obj): The list of testing samples
                The shape of X_test is (n_test_samples, n_features)

            y_predicted(list of obj): The predicted target y values (parallel to X_test)

        train = myutils.stitch_x_and_y_trains(self.X_train, self.y_train)
        header = myutils.get_generic_header(train)
        y_predicted = []
        for item in X_test:
            y_predicted.append(myutils.tdidt_predict(header, self.tree, item))

        return y_predicted
Example #9
    def predict(self, X_test):
        """Makes predictions for test instances in X_test.

            X_test(list of list of obj): The list of testing samples
                The shape of X_test is (n_test_samples, n_features)

            y_predicted(list of obj): The predicted target y values (parallel to X_test)
        # Create y_predicted
        y_predicted = []
        # Set the header
        header = ['att' + str(i) for i in range(len(self.X_train[0]))]
        # Traverse the X_test
        for test in X_test:
            # Call predict on test and append that to y_predicted
            y_predicted.append(myutils.tdidt_predict(header, self.tree, test))
        # Return y_predicted
        return y_predicted
Example #10
    def predict(self, X_test):
        """Makes predictions for test instances in X_test.

            X_test(list of list of obj): The list of testing samples
                The shape of X_test is (n_test_samples, n_features)

            y_predicted(list of obj): The predicted target y values (parallel to X_test)
        # APIServiceFun interview_app.py
        #lecture 4/8
        y_predicted = []
        header = myutils.build_header(self.X_train)
        for instance in X_test:
            y_predicted.append(myutils.tdidt_predict(header, self.tree, instance))
        # print(y_predicted)

        return y_predicted # TODO: fix this
Example #11
    def fit(self, X_train, y_train, M=7, N=20, F=2):
        """Fits a random forest classifier to X_train and y_train using the TDIDT (top down induction of decision tree) algorithm.

            X_train(list of list of obj): The list of training instances (samples). 
                The shape of X_train is (n_train_samples, n_features)
            y_train(list of obj): The target y values (parallel to X_train)
                The shape of y_train is n_train_samples
        self.X_train = copy.deepcopy(X_train)
        self.y_train = copy.deepcopy(y_train)

        # create random stratified test set with 2:1 ratio
        X_remainder, X_test, y_remainder, y_test = myevaluation.train_test_split(
            copy.deepcopy(X_train), copy.deepcopy(y_train))

        for i, x in enumerate(y_remainder):
        for i, x in enumerate(y_test):
        # generate N random decision trees using bagging
        trees = []
        for i in range(N):
            # print(i)
            # print("getting sample and validation sets...")
            # get the sample and validation sets
            sample = myutils.compute_bootstrapped_sample(X_remainder)
            validation_set = []
            for x in X_remainder:
                if x not in sample:
            # print("length of sample and validation sets:", len(sample), len(validation_set))
            # print("getting the tree...")
            # get the tree from the sample
            available_attributes = myutils.get_available_attributes(sample)
            tree = myutils.tdidt_random_forest(
                sample, [x for x in range(0,
                                          len(sample[0]) - 1)],
                available_attributes, F)

            # print("testing the tree")
            # test against the validation set
            validation_set_x = [x[:-1] for x in validation_set]
            validation_set_y = [x[-1] for x in validation_set]
            predictions = []
            header = []
            for i in range(0, len(validation_set_x[0])):
                header.append("att" + str(i))
            for x, y in zip(validation_set_x, validation_set_y):
                prediction = myutils.tdidt_predict(header, tree, x)
                predictions.append(int(prediction == y))

            # print("accuracy:", sum(predictions)/len(predictions))
                "accuracy": sum(predictions) / len(predictions),
                "tree": tree

        # print("getting the best M trees")
        # get the best M of N trees
        trees = sorted(trees, key=lambda k: k["accuracy"], reverse=True)
        self.trees = trees[:M]