def main(c = "decision_tree", option = "IG", dataset = "iris", ratio = 0.8): classifier_types = {0: "decision_tree", 1: "naive_bayes", 2: "neural_net"} options = {0:["IG", "IGR"], 1:["normal"], 2:["shallow", "medium"]} ratio = float(ratio) if dataset == "monks": (training, test) = load_data.load_monks(ratio) elif dataset == "congress": (training, test) = load_data.load_congress_data(ratio) elif dataset == "iris": (training, test) = load_data.load_iris(ratio) else: print "Error: Cannot find dataset name." return print "Training... Please hold." # classifier_types = {0: "decision_tree", 2: "neural_net"} # options = {0:["IG", "IGR"], 2:["shallow", "medium"]} # (training, test) = load_data.load_iris(0.8) # nn_classifier = Classifier(classifier_type="neural_net", option = "medium") # nn_classifier.train(training) # nn_classifier.test(test) # print test # (training, test) = load_data.load_congress_data(0.8) # print test # (training, test) = load_data.load_monks(1) # print test # (training, test) = load_data.load_iris(0.8) # print training # "option = IG/IGR" # dt_classifier = Classifier(classifier_type="decision_tree", weights=[], option="IG") # dt_classifier.train(training) # dt_classifier.test(test) # for i, c in classifier_types.iteritems(): # for option in options[i]: print " " print "=================================================================" print "Dataset = ", dataset print "Classifier = ", c print "Option = ", option classifier = Classifier(classifier_type=c, weights = [], option = option) classifier.train(training) classifier.test(test) print "=================================================================" print " " # option value could be either shallow(3 layers) or medium(5) # nn_classifier = Classifier(classifier_type="neural_net", option = "medium") # nn_classifier.train(training) # nn_classifier.test(test) return
def trainNtest(args): classifierType = ["decision_tree", "naive_bayes", "neural_network"] data_set = ["congress", "monk", "iris"] data = "" if len(args) == 4: if args[0][3:] == "congress": data = ld.load_congress_data(int(args[1][3:]) / 100.0) num_input = 16 num_output = 2 elif args[0][3:] == "monk": data = ld.load_monks(int(args[1])) num_input = 6 num_output = 2 elif args[0][3:] == "iris": data = ld.load_iris(int(args[1][3:]) / 100.0) num_input = 4 num_output = 3 else: print "INVALID DATA NAME" return method_num = int(args[2][3]) kwargs = {} if method_num == 0 or method_num == 2: kwargs[1] = args[2][5] kwargs[2] = args[2][7] classifier = Classifier(classifierType[int(args[2][3])], one=args[2][5], two=args[2][7], num_input=num_input, num_output=num_output) else: classifier = Classifier(classifierType[int(args[2][3])]) else: print "ERROR: NEED 4 PARAMETERS" return #pdb.set_trace() #nb = Naive_Bayes("naive_bayes") #classifier = Classifier(classifierType[1]) #data = ld.load_congress_data(.85) #data = ld.load_iris(.70) #pdb.set_trace() classifier.train(data[0]) if args[3] == "-test": classifier.test(data[1]) else: classifier.test(data[0])
def run(self): # First the selected dataset needs to be loaded dataset_name = self.data_selection.get() if dataset_name == "Iris": print("Selecting Iris!") data = load_data.load_iris() elif dataset_name == "Seeds": data = load_data.load_seeds() elif dataset_name == "Glass": data = load_data.load_glass() elif dataset_name == "Banknote": data = load_data.load_banknote() elif dataset_name == "Customers": data = load_data.load_cust_data() # Now run the selected clustering algorithm score_list = [score_funcs.cluster_sse] if self.alg_selection.get() == "K-Means": Analyze.analyze( data, dataset_name, 10, self.build_kMeans_func(*kMeans_params[dataset_name]), score_list) elif self.alg_selection.get() == "DBSCAN": Analyze.analyze( data, dataset_name, 10, self.build_dbscan_func(*dbscan_params[dataset_name]), score_list) elif self.alg_selection.get() == "Competitive Learning": Analyze.analyze(data, dataset_name, 10, self.build_cl_func(*cl_params[dataset_name]), score_list) elif self.alg_selection.get() == "PSO": Analyze.analyze(data, dataset_name, 10, self.build_pso_function(*pso_params[dataset_name]), score_list) elif self.alg_selection.get() == "ACO": Analyze.analyze(data, dataset_name, 10, self.build_aco_func(*aco_params[dataset_name]), score_list)
# input images / video -- link to the file you want # could upload file to the specific df file = st.file_uploader('File uploader') # have user pick a color st.color_picker('Pick a color') # name of the app we want to appear on the top of the screen st.title("My Awesome Flower Predictor") st.header("We predict Iris types") st.subheader("No joke") # get input from user about Iris and predict what type/species it is # load data df_iris = load_data.load_iris() # make plots on the page st.plotly_chart(px.scatter(df_iris, 'sepal_width', 'sepal_length')) # make it an option to show only if the user wants to # question prompt for checkbox -- save result as boolean show_df = st.checkbox("Do you want to see the data?") if show_df: # if True df_iris # make interactive -- get user input so we can make a prediction # must manually list the questions in the RIGHT ORDER as the df table
X_ = X_ - self.mu.reshape((1, K, d)) # (n, K, d) X_ = X_.reshape((n, K, d, 1)) * P_ X__ = X_.reshape((n, K, 1, d)) outer = X_ @ X__ # (n, K, d, d) Si_hat = outer.sum(axis=0) / Nks # (K, d, d) # Pi pi_hat = Nks.squeeze() / n return mu_hat, Si_hat, pi_hat if __name__ == "__main__": # Iris X, _ = load_iris() K = 3 gmm = MyGMM(K) gmm.mu, gmm.Sigma, gmm.pi = gmm.initialize_parameters(X) y1 = gmm.predict(X) gmm.fit(X, max_iter=100) y2 = gmm.predict(X) fig, ax = plt.subplots(2, 2) ax[0, 0].scatter(X[:,0], X[:,1], c=y1) ax[0, 1].scatter(X[:,0], X[:,1], c=y2)
""" Principal Components Analysis (PCA) using NumPy Dataset: Iris dataset. Adapted from Plotly PCA tutorial https://plot.ly/ipython-notebooks/principal-component-analysis/ """ from load_data import load_iris import numpy as np X_std, y = load_iris(std=True) def pca(X_std): # 1. Calculate covariance matrix mean_vec = np.mean(X_std, axis=0) # same as np.cov(X_std.T) N = X_std.shape[0] cov_mat = (X_std - mean_vec).T.dot((X_std - mean_vec)) / (N - 1) # 2. Find eigenvectors and eigenvalues by SVD u, s, v = np.linalg.svd(X_std.T) eig_vals = s**2 / (N - 1) eig_vecs = u # Can also do by eigendecomposition -> less efficient, O(N^3) # vs O(min(M,N)MN). # can also do for cor_mat1 = np.corrcoef(X_std.T)
import streamlit as st import plotly.express as px import numpy as np import pickle from load_data import load_iris st.title("My Growing Garden") st.header("We plant plants") st.subheader("orchids and tulips") #load data df_iris = load_iris() show_df = st.checkbox("Do you want to see the plant data?") if show_df: df_iris st.plotly_chart(px.scatter(df_iris, 'sepal_width', 'sepal_length')) #get user flower input s_l = st.number_input('Input the sepal length') s_w = st.number_input('Input the sepal width') p_l = st.number_input('Input the petal length') p_w = st.number_input('Input the petal width') user_values = np.array([s_l, s_w, p_l, p_w]) #load model
""" K-means implementation Seems okay: Have not tested this rigorously, but this separates 'Iris-setosa' pretty well from 'Iris-versicolor' and 'Iris-virginica', but mixes the latter two. Dataset: Iris dataset. """ from load_data import load_iris import numpy as np X_std, y = load_iris(std=True) def k_means(X, num_means=3, num_iterations=10): """K means. Assumes each datapoint is a 1D array.""" # data dim N, D = X.shape # initialise vars assignments = np.zeros(N) dists = np.zeros((N, num_means)) # 1. Init means means = np.random.random((num_means, D)) # 2. Iterate for i in range(num_iterations):
from load_data import load_congress_data from load_data import load_monks from nn import NN from nb import NB from nb import GNB from tree import DT import random import math import numpy.matlib # number of class of each data set ncIris = 3 ncCongress = 2 ncMonks = 2 # Dataset reading trainSetIris = np.matrix(load_iris(0.7)[0]) testSetIris = np.matrix(load_iris(0.7)[1]) trainSetCongress = np.matrix(load_congress_data(0.7)[0]) testSetCongress = np.matrix(load_congress_data(0.7)[1]) trainM1 = np.matrix(load_monks(1)[0]) testM1 = np.matrix(load_monks(1)[1]) trainM2 = np.matrix(load_monks(2)[0]) testM2 = np.matrix(load_monks(2)[1]) trainM3 = np.matrix(load_monks(3)[0]) testM3 = np.matrix(load_monks(3)[1]) # Decision Tree on Congress using IG print "1. DT, Congress, IG--------------------------------" model0 = DT() model0.train(trainSetCongress, ncCongress, 1) model0.test(testSetCongress, ncCongress, 1)
classifier_type = sys.argv[1] if classifier_type == "decision_tree": data = sys.argv[3] else: data = sys.argv[2] params = dict() (train_data, test_data) = (None, None) if data=="congress": (train_data, test_data) = load_data.load_congress_data(0.7) elif data=="iris": (train_data, test_data) = load_data.load_iris(0.7) elif data=="monk": i = 3 if classifier_type == "decision_tree": i = 4 numb = sys.argv[i] (train_data, test_data) = load_data.load_monks(int(numb)) params = dict() if classifier_type == "neural_network": i=3 if data=="monk": i=4 if len(sys.argv) >= (i+1): params["activation"] = sys.argv[i]