np.random.shuffle(index) len,_=df.values.shape train_size=int(len*0.6) train_data=df.loc[index][0:train_size] test_data=df.loc[index][train_size:] return train_data,test_data df=pd.read_csv("iris.data",names=['sepal_length','sepal_width','petal_length','petal_width','label']) df = df[['sepal_width','petal_width', 'label']] df = df.replace({'label' : {'Iris-setosa': 0, 'Iris-versicolor': 0, 'Iris-virginica': 1}}) train_data,test_data = train_test_split(df) X = train_data[['sepal_width','petal_width']] y = train_data[['label']]['label'] tree_iris = DecisionTree(criterion=criteria,max_depth=1) Classifier_AB_iris = AdaBoostClassifier(base_estimator=tree_iris, n_estimators=n_estimators ) Classifier_AB_iris.fit(X, y) y_hat = Classifier_AB_iris.predict(X) #[fig1, fig2] = Classifier_AB.plot() print('Criteria :', criteria) print('Accuracy: ', accuracy(y_hat, y)) print() for cls in y.unique(): if cls == 1: print('Category: Iris-virginica') else: print('Category: Not Iris-virginica') print('Precision: ', precision(y_hat, y, cls)) print('Recall: ', recall(y_hat, y, cls)) print()
import csv np.random.seed(42) ########### AdaBoostClassifier on Real Input and Discrete Output ################### N = 30 P = 2 NUM_OP_CLASSES = 2 n_estimators = 3 X = pd.DataFrame(np.abs(np.random.randn(N, P))) y = pd.Series(np.random.randint(NUM_OP_CLASSES, size=N), dtype="category") criteria = 'information_gain' Dtree = tree.DecisionTreeClassifier(criterion='entropy', max_depth=1) Classifier_AB = AdaBoostClassifier(base_estimator=Dtree, n_estimators=n_estimators) Classifier_AB.fit(X, y) y_hat = Classifier_AB.predict(X) [fig1, fig2] = Classifier_AB.plot() print('Criteria :', criteria) print('Accuracy: ', accuracy(y_hat, y)) for cls in y.unique(): print('Precision: ', precision(y_hat, y, cls)) print('Recall: ', recall(y_hat, y, cls)) ##### AdaBoostClassifier on Iris data set using the entire data set with sepal width and petal width as the two features def load_iris(): X = [] y = []
np.random.seed(42) ########### AdaBoostClassifier on Real Input and Discrete Output ################### N = 30 P = 2 NUM_OP_CLASSES = 2 n_estimators = 3 X = pd.DataFrame(np.abs(np.random.randn(N, P))) y = pd.Series(np.random.randint(NUM_OP_CLASSES, size = N), dtype="category") criteria = 'information_gain' # tree = DecisionTree(criterion=criteria) Classifier_AB = AdaBoostClassifier(base_estimator='tree', n_estimators=n_estimators ) Classifier_AB.fit(X, y) y_hat = Classifier_AB.predict(X) # [fig1, fig2] = Classifier_AB.plot() print('Criteria :', criteria) print('Accuracy: ', accuracy(y_hat, y)) for cls in y.unique(): print('Precision for',cls,' : ', precision(y_hat, y, cls)) print('Recall for ',cls ,': ', recall(y_hat, y, cls)) ##### AdaBoostClassifier on Iris data set using the entire data set with sepal width and petal width as the two features dataset = load_iris() X, y = dataset.data, dataset.target X = pd.DataFrame(X)
# y_hat = Classifier_AB.predict(X) # # [fig1, fig2] = Classifier_AB.plot() # print('Criteria :', criteria) # print('Accuracy: ', accuracy(y_hat, y)) # for cls in np.unique(y): # print('Precision of {} is: '.format(cls), precision(y_hat, y, cls)) # print('Recall of {} is: '.format(cls), recall(y_hat, y, cls)) # del(Classifier_AB) ##### AdaBoostClassifier on Iris data set using the entire data set with sepal width and petal width as the two features da = pd.read_csv('iris.csv') col1 = da["sepal_width"] col2 = da["petal_width"] label = np.array(da["species"]) label = np.where(label == "virginica", 1, -1) iris = pd.merge(col1, col2, left_index=True, right_index=True) iris["Truth"] = label iris = iris.sample(frac=1).reset_index(drop=True) split_at = int(0.6 * (iris.shape[0])) X_train = iris.iloc[:split_at, :-1] y_train = iris.iloc[:split_at, -1] X_test = iris.iloc[split_at:, :-1] y_test = iris.iloc[split_at:, -1] Classifier_AB1 = AdaBoostClassifier(n_estimators=3) Classifier_AB1.fit(X_train, y_train) y_hat = Classifier_AB1.predict(X_test) print(list(y_hat), list(y_test)) print("Accuracy: ", accuracy(y_hat, y_test)) Classifier_AB1.plot(X_test)
re = X.shape[0] img_weights = [1 / re] * re tree.fit(X, y, img_weights) yhat = pd.Series(tree.predict(X)) print('Criteria :', criteria) print('Accuracy: ', accuracy(yhat, y)) for cls in y.unique(): print("***Class :" + str(cls) + "***") print('Precision: ', precision(yhat, y, cls)) print('Recall: ', recall(yhat, y, cls)) print("-----------------------------------------------------------") print("Adaboost on random data") print("-----------------------------------------------------------") Classifier_AB = AdaBoostClassifier(base_estimator=tree, n_estimators=n_estimators) Classifier_AB.fit(X, y) y_hat = Classifier_AB.predict(X) fig1, fig2 = Classifier_AB.plot() print('Criteria :', criteria) print('Accuracy: ', accuracy(y_hat, y)) for cls in y.unique(): print("***Class :" + str(cls) + "***") print('Precision: ', precision(y_hat, y, cls)) print('Recall: ', recall(y_hat, y, cls)) ##### AdaBoostClassifier on Iris data set using the entire data set with sepal width and petal width as the two features X = pd.read_csv("iris.data") # a = [] for i in range(5):
# Or you could import sklearn DecisionTree from linearRegression.linearRegression import LinearRegression np.random.seed(42) ########### AdaBoostClassifier on Real Input and Discrete Output ################### N = 30 P = 2 NUM_OP_CLASSES = 2 n_estimators = 3 X = pd.DataFrame(np.abs(np.random.randn(N, P))) y = pd.Series(np.random.randint(NUM_OP_CLASSES, size=N), dtype="category") criteria = 'information_gain' tree = DecisionTree(criterion=criteria, max_depth=1) Classifier_AB = AdaBoostClassifier(base_estimator=tree, n_estimators=n_estimators) Classifier_AB.fit(X, y) y_hat = Classifier_AB.predict(X) Classifier_AB.plot() # #[fig1, fig2] = Classifier_AB.plot() print('Criteria :', criteria) print('Accuracy: ', accuracy(y_hat, y)) print() for cls in y.unique(): print('Category: ', cls) print('Precision: ', precision(y_hat, y, cls)) print('Recall: ', recall(y_hat, y, cls)) print()
from linearRegression.linearRegression import LinearRegression np.random.seed(42) ########### AdaBoostClassifier on Real Input and Discrete Output ################### N = 30 P = 2 NUM_OP_CLASSES = 2 n_estimators = 3 X = pd.DataFrame(np.abs(np.random.randn(N, P))) y = pd.Series(np.random.randint(NUM_OP_CLASSES, size = N), dtype="category") criteria = 'information_gain' Classifier_AB = AdaBoostClassifier(base_estimator=tree, n_estimators=n_estimators ) y = y.replace(0,-1) Classifier_AB.fit(X, y) Classifier_AB.classifier y_hat = Classifier_AB.predict(X) print(y_hat,y) # [fig1, fig2] = Classifier_AB.plot() print('Criteria :', criteria) print('Accuracy: ', accuracy(y_hat, y)) for cls in y.unique(): print('Precision: ', precision(y_hat, y, cls)) print('Recall: ', recall(y_hat, y, cls))
# from linearRegression.linearRegression import LinearRegression np.random.seed(42) ########### AdaBoostClassifier on Real Input and Discrete Output ################### N = 30 P = 2 NUM_OP_CLASSES = 2 n_estimators = 5 X = pd.DataFrame(np.abs(np.random.randn(N, P))) y = pd.Series(np.random.randint(NUM_OP_CLASSES, size=N), dtype="category") y = y.cat.rename_categories([-1, 1]) # Changing 0 to -1 for adaboost criteria = 'entropy' tree = DecisionTreeClassifier Classifier_AB = AdaBoostClassifier(base_estimator=tree, n_estimators=n_estimators) Classifier_AB.fit(X, y) y_hat = Classifier_AB.predict(X) [fig1, fig2] = Classifier_AB.plot(X, y) print('Criteria :', criteria) print('Accuracy: ', accuracy(y_hat, y)) for cls in y.unique(): print('Precision: ', precision(y_hat, y, cls)) print('Recall: ', recall(y_hat, y, cls)) # AdaBoostClassifier on Iris data set using the entire data set with sepal width and petal width as the two features split = 0.6 iris = pd.read_csv(os.path.join("data", "iris.csv")) iris["variety"] = iris["variety"].astype("category") shuffled = iris.sample(frac=1).reset_index(drop=True)