np.random.shuffle(index)
    len,_=df.values.shape
    train_size=int(len*0.6)
    train_data=df.loc[index][0:train_size]
    test_data=df.loc[index][train_size:]  
    return train_data,test_data


df=pd.read_csv("iris.data",names=['sepal_length','sepal_width','petal_length','petal_width','label'])
df = df[['sepal_width','petal_width', 'label']]
df = df.replace({'label' : {'Iris-setosa': 0, 'Iris-versicolor': 0, 'Iris-virginica': 1}})
train_data,test_data = train_test_split(df)
X = train_data[['sepal_width','petal_width']]
y = train_data[['label']]['label']

tree_iris = DecisionTree(criterion=criteria,max_depth=1)
Classifier_AB_iris = AdaBoostClassifier(base_estimator=tree_iris, n_estimators=n_estimators )
Classifier_AB_iris.fit(X, y)
y_hat = Classifier_AB_iris.predict(X)
#[fig1, fig2] = Classifier_AB.plot()
print('Criteria :', criteria)
print('Accuracy: ', accuracy(y_hat, y))
print()
for cls in y.unique():
    if cls == 1:
        print('Category: Iris-virginica')
    else:
        print('Category: Not Iris-virginica')
    print('Precision: ', precision(y_hat, y, cls))
    print('Recall: ', recall(y_hat, y, cls))
    print()
np.random.seed(42)

########### AdaBoostClassifier on Real Input and Discrete Output ###################


N = 30
P = 2
NUM_OP_CLASSES = 2
n_estimators = 3
X = pd.DataFrame(np.abs(np.random.randn(N, P)))
y = pd.Series(np.random.randint(NUM_OP_CLASSES, size = N), dtype="category")

criteria = 'information_gain'
# tree = DecisionTree(criterion=criteria)
Classifier_AB = AdaBoostClassifier(base_estimator='tree', n_estimators=n_estimators )
Classifier_AB.fit(X, y)
y_hat = Classifier_AB.predict(X)
# [fig1, fig2] = Classifier_AB.plot()
print('Criteria :', criteria)
print('Accuracy: ', accuracy(y_hat, y))
for cls in y.unique():
    print('Precision for',cls,' : ', precision(y_hat, y, cls))
    print('Recall for ',cls ,': ', recall(y_hat, y, cls))



##### AdaBoostClassifier on Iris data set using the entire data set with sepal width and petal width as the two features
dataset = load_iris()
X, y = dataset.data, dataset.target 
X = pd.DataFrame(X)
# y_hat = Classifier_AB.predict(X)
# # [fig1, fig2] = Classifier_AB.plot()
# print('Criteria :', criteria)
# print('Accuracy: ', accuracy(y_hat, y))
# for cls in np.unique(y):
#     print('Precision of {} is: '.format(cls), precision(y_hat, y, cls))
#     print('Recall of {} is: '.format(cls), recall(y_hat, y, cls))

# del(Classifier_AB)

##### AdaBoostClassifier on Iris data set using the entire data set with sepal width and petal width as the two features
da = pd.read_csv('iris.csv')
col1 = da["sepal_width"]
col2 = da["petal_width"]
label = np.array(da["species"])
label = np.where(label == "virginica", 1, -1)
iris = pd.merge(col1, col2, left_index=True, right_index=True)
iris["Truth"] = label
iris = iris.sample(frac=1).reset_index(drop=True)
split_at = int(0.6 * (iris.shape[0]))
X_train = iris.iloc[:split_at, :-1]
y_train = iris.iloc[:split_at, -1]
X_test = iris.iloc[split_at:, :-1]
y_test = iris.iloc[split_at:, -1]
Classifier_AB1 = AdaBoostClassifier(n_estimators=3)
Classifier_AB1.fit(X_train, y_train)
y_hat = Classifier_AB1.predict(X_test)
print(list(y_hat), list(y_test))
print("Accuracy: ", accuracy(y_hat, y_test))
Classifier_AB1.plot(X_test)
import csv

np.random.seed(42)

########### AdaBoostClassifier on Real Input and Discrete Output ###################

N = 30
P = 2
NUM_OP_CLASSES = 2
n_estimators = 3
X = pd.DataFrame(np.abs(np.random.randn(N, P)))
y = pd.Series(np.random.randint(NUM_OP_CLASSES, size=N), dtype="category")

criteria = 'information_gain'
Dtree = tree.DecisionTreeClassifier(criterion='entropy', max_depth=1)
Classifier_AB = AdaBoostClassifier(base_estimator=Dtree,
                                   n_estimators=n_estimators)
Classifier_AB.fit(X, y)
y_hat = Classifier_AB.predict(X)
[fig1, fig2] = Classifier_AB.plot()
print('Criteria :', criteria)
print('Accuracy: ', accuracy(y_hat, y))
for cls in y.unique():
    print('Precision: ', precision(y_hat, y, cls))
    print('Recall: ', recall(y_hat, y, cls))

##### AdaBoostClassifier on Iris data set using the entire data set with sepal width and petal width as the two features


def load_iris():
    X = []
    y = []
# from linearRegression.linearRegression import LinearRegression

np.random.seed(42)

########### AdaBoostClassifier on Real Input and Discrete Output ###################
N = 30
P = 2
NUM_OP_CLASSES = 2
n_estimators = 5
X = pd.DataFrame(np.abs(np.random.randn(N, P)))
y = pd.Series(np.random.randint(NUM_OP_CLASSES, size=N), dtype="category")
y = y.cat.rename_categories([-1, 1])  # Changing 0 to -1 for adaboost

criteria = 'entropy'
tree = DecisionTreeClassifier
Classifier_AB = AdaBoostClassifier(base_estimator=tree,
                                   n_estimators=n_estimators)
Classifier_AB.fit(X, y)
y_hat = Classifier_AB.predict(X)
[fig1, fig2] = Classifier_AB.plot(X, y)
print('Criteria :', criteria)
print('Accuracy: ', accuracy(y_hat, y))
for cls in y.unique():
    print('Precision: ', precision(y_hat, y, cls))
    print('Recall: ', recall(y_hat, y, cls))

# AdaBoostClassifier on Iris data set using the entire data set with sepal width and petal width as the two features
split = 0.6

iris = pd.read_csv(os.path.join("data", "iris.csv"))
iris["variety"] = iris["variety"].astype("category")
shuffled = iris.sample(frac=1).reset_index(drop=True)