예제 #1
0
def main(c = "decision_tree", option = "IG", dataset = "iris", ratio = 0.8):

	classifier_types = {0: "decision_tree", 1: "naive_bayes", 2: "neural_net"}
	options = {0:["IG", "IGR"], 1:["normal"], 2:["shallow", "medium"]}

	ratio = float(ratio)

	if dataset == "monks":
		(training, test) = load_data.load_monks(ratio)
	elif dataset == "congress":
		(training, test) = load_data.load_congress_data(ratio)
	elif dataset == "iris":
		(training, test) = load_data.load_iris(ratio)
	else:
		print "Error: Cannot find dataset name."
		return

	print "Training... Please hold."
	# classifier_types = {0: "decision_tree", 2: "neural_net"}
	# options = {0:["IG", "IGR"], 2:["shallow", "medium"]}
	# (training, test) = load_data.load_iris(0.8)
	# nn_classifier = Classifier(classifier_type="neural_net", option = "medium")
	# nn_classifier.train(training)
	# nn_classifier.test(test)

	# print test
	# (training, test) = load_data.load_congress_data(0.8)
	# print test
	# (training, test) = load_data.load_monks(1)
	# print test	

	# (training, test) = load_data.load_iris(0.8)
	# print training
	# "option = IG/IGR"
	# dt_classifier = Classifier(classifier_type="decision_tree", weights=[], option="IG")
	# dt_classifier.train(training)
	# dt_classifier.test(test)
	# for i, c in classifier_types.iteritems():
	# 	for option in options[i]:
	print "                                                                 "
	print "================================================================="
	print "Dataset    = ", dataset
	print "Classifier = ", c
	print "Option     = ", option
	classifier = Classifier(classifier_type=c, weights = [], option = option)
	classifier.train(training)
	classifier.test(test)
	print "================================================================="
	print "                                                                 "
	# option value could be either shallow(3 layers) or medium(5)
	# nn_classifier = Classifier(classifier_type="neural_net", option = "medium")
	# nn_classifier.train(training)
	# nn_classifier.test(test)
	return 
예제 #2
0
def trainNtest(args):
    classifierType = ["decision_tree", "naive_bayes", "neural_network"]
    data_set = ["congress", "monk", "iris"]

    
    data = ""
    if len(args) == 4:
        if args[0][3:] == "congress":
            data = ld.load_congress_data(int(args[1][3:]) / 100.0)
            num_input = 16
            num_output = 2 
        elif args[0][3:] == "monk":
            data = ld.load_monks(int(args[1]))
            num_input = 6
            num_output = 2 
        elif args[0][3:] == "iris":
            data = ld.load_iris(int(args[1][3:]) / 100.0)
            num_input = 4
            num_output = 3
        else:
            print "INVALID DATA NAME"
            return
        method_num = int(args[2][3])
        kwargs = {}
        if method_num == 0 or method_num == 2:
            kwargs[1] = args[2][5]
            kwargs[2] = args[2][7]
            classifier = Classifier(classifierType[int(args[2][3])], one=args[2][5], two=args[2][7], num_input=num_input, num_output=num_output)
        else:
            classifier = Classifier(classifierType[int(args[2][3])])
    else:
        print "ERROR: NEED 4 PARAMETERS"
        return 


    #pdb.set_trace()
    #nb = Naive_Bayes("naive_bayes")

    #classifier = Classifier(classifierType[1])
    #data = ld.load_congress_data(.85)

    #data = ld.load_iris(.70)

    #pdb.set_trace()

    classifier.train(data[0])


    if args[3] == "-test":
        classifier.test(data[1])
    else:
        classifier.test(data[0])
예제 #3
0
    def run(self):
        # First the selected dataset needs to be loaded
        dataset_name = self.data_selection.get()
        if dataset_name == "Iris":
            print("Selecting Iris!")
            data = load_data.load_iris()
        elif dataset_name == "Seeds":
            data = load_data.load_seeds()
        elif dataset_name == "Glass":
            data = load_data.load_glass()
        elif dataset_name == "Banknote":
            data = load_data.load_banknote()
        elif dataset_name == "Customers":
            data = load_data.load_cust_data()

        # Now run the selected clustering algorithm
        score_list = [score_funcs.cluster_sse]
        if self.alg_selection.get() == "K-Means":
            Analyze.analyze(
                data, dataset_name, 10,
                self.build_kMeans_func(*kMeans_params[dataset_name]),
                score_list)
        elif self.alg_selection.get() == "DBSCAN":
            Analyze.analyze(
                data, dataset_name, 10,
                self.build_dbscan_func(*dbscan_params[dataset_name]),
                score_list)
        elif self.alg_selection.get() == "Competitive Learning":
            Analyze.analyze(data, dataset_name, 10,
                            self.build_cl_func(*cl_params[dataset_name]),
                            score_list)
        elif self.alg_selection.get() == "PSO":
            Analyze.analyze(data, dataset_name, 10,
                            self.build_pso_function(*pso_params[dataset_name]),
                            score_list)
        elif self.alg_selection.get() == "ACO":
            Analyze.analyze(data, dataset_name, 10,
                            self.build_aco_func(*aco_params[dataset_name]),
                            score_list)
예제 #4
0
# input images / video -- link to the file you want
# could upload file to the specific df
file = st.file_uploader('File uploader')

# have user pick a color
st.color_picker('Pick a color')

# name of the app we want to appear on the top of the screen
st.title("My Awesome Flower Predictor")
st.header("We predict Iris types")
st.subheader("No joke")

# get input from user about Iris and predict what type/species it is

# load data
df_iris = load_data.load_iris()

# make plots on the page
st.plotly_chart(px.scatter(df_iris, 'sepal_width', 'sepal_length'))

# make it an option to show only if the user wants to

# question prompt for checkbox -- save result as boolean
show_df = st.checkbox("Do you want to see the data?")

if show_df:  # if True
    df_iris

# make interactive -- get user input so we can make a prediction
# must manually list the questions in the RIGHT ORDER as the df table
예제 #5
0
		X_		= X_ - self.mu.reshape((1, K, d))		# (n, K, d)

		X_		= X_.reshape((n, K, d, 1)) * P_
		X__		= X_.reshape((n, K, 1, d))

		outer	= X_ @ X__								# (n, K, d, d)
		Si_hat	= outer.sum(axis=0) / Nks				# (K, d, d)

		# Pi
		pi_hat	= Nks.squeeze() / n
		return  mu_hat, Si_hat, pi_hat

if __name__ == "__main__":

	# Iris
	X, _	= load_iris()
	K		= 3
	gmm		= MyGMM(K)

	gmm.mu, gmm.Sigma, gmm.pi = gmm.initialize_parameters(X)
	
	y1 = gmm.predict(X) 
	gmm.fit(X, max_iter=100)
	y2 = gmm.predict(X) 

	fig, ax = plt.subplots(2, 2)

	ax[0, 0].scatter(X[:,0], X[:,1], c=y1)
	ax[0, 1].scatter(X[:,0], X[:,1], c=y2)

예제 #6
0
"""
Principal Components Analysis (PCA) using NumPy

Dataset: Iris dataset.
Adapted from Plotly PCA tutorial
https://plot.ly/ipython-notebooks/principal-component-analysis/
"""

from load_data import load_iris
import numpy as np

X_std, y = load_iris(std=True)


def pca(X_std):

    # 1. Calculate covariance matrix
    mean_vec = np.mean(X_std, axis=0)
    # same as np.cov(X_std.T)
    N = X_std.shape[0]
    cov_mat = (X_std - mean_vec).T.dot((X_std - mean_vec)) / (N - 1)

    # 2. Find eigenvectors and eigenvalues by SVD
    u, s, v = np.linalg.svd(X_std.T)

    eig_vals = s**2 / (N - 1)
    eig_vecs = u

    # Can also do by eigendecomposition -> less efficient, O(N^3)
    # vs O(min(M,N)MN).
    # can also do for cor_mat1 = np.corrcoef(X_std.T)
예제 #7
0
import streamlit as st
import plotly.express as px
import numpy as np
import pickle
from load_data import load_iris

st.title("My Growing Garden")
st.header("We plant plants")
st.subheader("orchids and tulips")

#load data

df_iris = load_iris()

show_df = st.checkbox("Do you want to see the plant data?")

if show_df:
    df_iris

st.plotly_chart(px.scatter(df_iris, 'sepal_width', 'sepal_length'))

#get user flower input
s_l = st.number_input('Input the sepal length')
s_w = st.number_input('Input the sepal width')
p_l = st.number_input('Input the petal length')
p_w = st.number_input('Input the petal width')

user_values = np.array([s_l, s_w, p_l, p_w])

#load model
예제 #8
0
"""
K-means implementation

Seems okay: Have not tested this rigorously, but this separates 'Iris-setosa' 
pretty well from 'Iris-versicolor' and 'Iris-virginica',
but mixes the latter two. 

Dataset: Iris dataset.
"""

from load_data import load_iris
import numpy as np


X_std, y = load_iris(std=True)


def k_means(X, num_means=3, num_iterations=10):
    """K means. Assumes each datapoint is a 1D array."""
    # data dim
    N, D = X.shape
    
    # initialise vars
    assignments = np.zeros(N)
    dists = np.zeros((N, num_means))    

    # 1. Init means
    means = np.random.random((num_means, D))
    
    # 2. Iterate
    for i in range(num_iterations):
예제 #9
0
from load_data import load_congress_data
from load_data import load_monks
from nn import NN
from nb import NB
from nb import GNB
from tree import DT
import random
import math
import numpy.matlib

# number of class of each data set
ncIris = 3
ncCongress = 2
ncMonks = 2
# Dataset reading
trainSetIris = np.matrix(load_iris(0.7)[0])
testSetIris = np.matrix(load_iris(0.7)[1])
trainSetCongress = np.matrix(load_congress_data(0.7)[0])
testSetCongress = np.matrix(load_congress_data(0.7)[1])
trainM1 = np.matrix(load_monks(1)[0])
testM1 = np.matrix(load_monks(1)[1])
trainM2 = np.matrix(load_monks(2)[0])
testM2 = np.matrix(load_monks(2)[1])
trainM3 = np.matrix(load_monks(3)[0])
testM3 = np.matrix(load_monks(3)[1])

# Decision Tree on Congress using IG
print "1. DT, Congress, IG--------------------------------"
model0 = DT()
model0.train(trainSetCongress, ncCongress, 1)
model0.test(testSetCongress, ncCongress, 1)
예제 #10
0
classifier_type = sys.argv[1]

if classifier_type == "decision_tree":
    data = sys.argv[3]
else:
    data = sys.argv[2]

params = dict()

(train_data, test_data) = (None, None)

if data=="congress":
    (train_data, test_data) = load_data.load_congress_data(0.7)
elif data=="iris":
    (train_data, test_data) = load_data.load_iris(0.7)
elif data=="monk":
    i = 3
    if classifier_type == "decision_tree":
        i = 4
    numb = sys.argv[i]
    (train_data, test_data) = load_data.load_monks(int(numb))

params = dict()

if classifier_type == "neural_network":
    i=3
    if data=="monk":
        i=4
    if len(sys.argv) >= (i+1):
        params["activation"] = sys.argv[i]