Example #1
0
 def test_open_csv_url(self):
     url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv'
     try:
         urllib2.urlopen(url)
     except (urllib2.HTTPError, urllib2.URLError):
         utils_for_tests.print_in_box('skipping test_open_csv_url',
                                      'remote resource not found')
         self.skipTest('couldn\'t get remote resource')
     sa = read.open_csv_url(url, delimiter=';')
     ctrl_dtype = [('fixed acidity', '<f8'), ('volatile acidity', '<f8'),
                   ('citric acid', '<f8'), ('residual sugar', '<f8'),
                   ('chlorides', '<f8'), ('free sulfur dioxide', '<f8'),
                   ('total sulfur dioxide', '<f8'), ('density', '<f8'),
                   ('pH', '<f8'), ('sulphates', '<f8'), ('alcohol', '<f8'),
                   ('quality', '<i8')]
     self.assertEqual(sa.dtype, ctrl_dtype)
Example #2
0
 def test_open_csv_url(self): 
     url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv'
     sa = read.open_csv_url(url, delimiter=';')
     ctrl_dtype = [('fixed acidity', '<f8'), 
                   ('volatile acidity', '<f8'), 
                   ('citric acid', '<f8'), 
                   ('residual sugar', '<f8'), 
                   ('chlorides', '<f8'), 
                   ('free sulfur dioxide', '<f8'), 
                   ('total sulfur dioxide', '<f8'), 
                   ('density', '<f8'), 
                   ('pH', '<f8'), 
                   ('sulphates', '<f8'), 
                   ('alcohol', '<f8'), 
                   ('quality', '<i8')]
     self.assertEqual(sa.dtype, ctrl_dtype)
Example #3
0
 def test_open_csv_url(self): 
     url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv'
     try:
         urllib2.urlopen(url)
     except (urllib2.HTTPError, urllib2.URLError):
         utils_for_tests.print_in_box('skipping test_open_csv_url', 'remote resource not found')
         self.skipTest('couldn\'t get remote resource')
     sa = read.open_csv_url(url, delimiter=';')
     ctrl_dtype = [('fixed acidity', '<f8'), 
                   ('volatile acidity', '<f8'), 
                   ('citric acid', '<f8'), 
                   ('residual sugar', '<f8'), 
                   ('chlorides', '<f8'), 
                   ('free sulfur dioxide', '<f8'), 
                   ('total sulfur dioxide', '<f8'), 
                   ('density', '<f8'), 
                   ('pH', '<f8'), 
                   ('sulphates', '<f8'), 
                   ('alcohol', '<f8'), 
                   ('quality', '<i8')]
     self.assertEqual(sa.dtype, ctrl_dtype)
Example #4
0
import numpy as np
import sklearn.datasets

from diogenes.read import open_csv_url
from diogenes.display import (plot_correlation_scatter_plot,
                               plot_correlation_matrix, 
                               plot_kernel_density,
                               plot_box_plot)

from diogenes.grid_search import Experiment 
from diogenes.grid_search import std_clfs as std_clfs
from diogenes.utils import remove_cols


data = open_csv_url(
            'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv',  
            delimiter=';')
y = data['quality']
M = remove_cols(data, 'quality')

y = y < np.average(y)


if False:
    for x in describe_cols(M):
        print x

if False:
   plot_correlation_scatter_plot(M) 
   plot_correlation_matrix(M)
   plot_kernel_density(M['f0']) #no designation of col name
Example #5
0
import numpy as np
import sklearn.datasets

from diogenes.read import open_csv_url
from diogenes.display import (plot_correlation_scatter_plot,
                              plot_correlation_matrix, plot_kernel_density,
                              plot_box_plot)

from diogenes.grid_search import Experiment
from diogenes.grid_search import std_clfs as std_clfs
from diogenes.utils import remove_cols

data = open_csv_url(
    'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv',
    delimiter=';')
y = data['quality']
M = remove_cols(data, 'quality')

y = y < np.average(y)

if False:
    for x in describe_cols(M):
        print x

if False:
    plot_correlation_scatter_plot(M)
    plot_correlation_matrix(M)
    plot_kernel_density(M['f0'])  #no designation of col name
    plot_box_plot(M['f0'])  #no designation of col name

exp = Experiment(M, y, clfs=std_clfs)