Example #1
0
import numpy as np
import sklearn.datasets

from eights.investigate import (
    cast_np_nd_to_sa,
    describe_cols,
)
from eights.communicate import (plot_correlation_scatter_plot,
                                plot_correlation_matrix, plot_kernel_density,
                                plot_box_plot)

#import numpy array
M = sklearn.datasets.load_iris().data
labels = sklearn.datasets.load_iris().target

M = cast_np_nd_to_sa(M)

#M is multi class, we want to remove those rows.
keep_index = np.where(labels != 2)

labels = labels[keep_index]
M = M[keep_index]

if False:
    for x in describe_cols(M):
        print x

if False:
    plot_correlation_scatter_plot(M)
    plot_correlation_matrix(M)
    plot_kernel_density(M['f0'])  #no designation of col name
Example #2
0
import numpy as np
import sklearn.datasets

from eights.investigate import cast_np_nd_to_sa, describe_cols
from eights.communicate import (
    plot_correlation_scatter_plot,
    plot_correlation_matrix,
    plot_kernel_density,
    plot_box_plot,
)

# import numpy array
M = sklearn.datasets.load_iris().data
labels = sklearn.datasets.load_iris().target

M = cast_np_nd_to_sa(M)


# M is multi class, we want to remove those rows.
keep_index = np.where(labels != 2)

labels = labels[keep_index]
M = M[keep_index]


if False:
    for x in describe_cols(M):
        print x

if False:
    plot_correlation_scatter_plot(M)
Example #3
0
                               plot_kernel_density,
                               plot_box_plot)

from eights.operate import run_std_classifiers 


data = open_csv_url_as_list(
            'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv',  
            delimiter=';')

col_names = data[0][:-1]
labels = np.array([int(x[-1]) for x in data[1:]])
#make this problem binary
labels = np.array([0 if x < np.average(labels) else 1 for x in labels])
dtype = np.dtype({'names':  col_names,'formats': [float] * (len(col_names)+1)})
M = cast_np_nd_to_sa(np.array([x[:-1] for x in data[1:]],dtype='float'), dtype)



import pdb; pdb.set_trace()
if False:
    for x in describe_cols(M):
        print x

if False:
   plot_correlation_scatter_plot(M) 
   plot_correlation_matrix(M)
   plot_kernel_density(M['f0']) #no designation of col name
   plot_box_plot(M['f0']) #no designation of col name