def test_filelike(self):
     """Test reading from file-like object (StringIO)"""
     f1 = open(test1)
     f2 = StringIO(open(test1).read())
     data1, meta1 = loadarff(f1)
     data2, meta2 = loadarff(f2)
     assert_(data1 == data2)
     assert_(repr(meta1) == repr(meta2))
Exemple #2
0
 def test_filelike(self):
     """Test reading from file-like object (StringIO)"""
     f1 = open(test1)
     f2 = StringIO(open(test1).read())
     data1, meta1 = loadarff(f1)
     data2, meta2 = loadarff(f2)
     assert_(data1 == data2)
     assert_(repr(meta1) == repr(meta2))
Exemple #3
0
 def test_filelike(self):
     # Test reading from file-like object (StringIO)
     with open(test1) as f1:
         data1, meta1 = loadarff(f1)
     with open(test1) as f2:
         data2, meta2 = loadarff(StringIO(f2.read()))
     assert_(data1 == data2)
     assert_(repr(meta1) == repr(meta2))
 def test_filelike(self):
     # Test reading from file-like object (StringIO)
     f1 = open(test1)
     data1, meta1 = loadarff(f1)
     f1.close()
     f2 = open(test1)
     data2, meta2 = loadarff(StringIO(f2.read()))
     f2.close()
     assert_(data1 == data2)
     assert_(repr(meta1) == repr(meta2))
Exemple #5
0
 def test_filelike(self):
     # Test reading from file-like object (StringIO)
     f1 = open(test1)
     data1, meta1 = loadarff(f1)
     f1.close()
     f2 = open(test1)
     data2, meta2 = loadarff(StringIO(f2.read()))
     f2.close()
     assert_(data1 == data2)
     assert_(repr(meta1) == repr(meta2))
Exemple #6
0
    def test_path(self):
        # Test reading from `pathlib.Path` object
        from pathlib import Path

        with open(test1) as f1:
            data1, meta1 = loadarff(f1)

        data2, meta2 = loadarff(Path(test1))

        assert_(data1 == data2)
        assert_(repr(meta1) == repr(meta2))
    def test_path(self):
        # Test reading from `pathlib.Path` object
        from pathlib import Path

        with open(test1) as f1:
            data1, meta1 = loadarff(f1)

        data2, meta2 = loadarff(Path(test1))

        assert_(data1 == data2)
        assert_(repr(meta1) == repr(meta2))
Exemple #8
0
 def test_nodata(self):
     # The file nodata.arff has no data in the @DATA section.
     # Reading it should result in an array with length 0.
     nodata_filename = os.path.join(data_path, 'nodata.arff')
     data, meta = loadarff(nodata_filename)
     expected_dtype = np.dtype([
         ('sepallength', '<f8'), ('sepalwidth', '<f8'),
         ('petallength', '<f8'), ('petalwidth', '<f8'), ('class', 'S15')
     ])
     assert_equal(data.dtype, expected_dtype)
     assert_equal(data.size, 0)
Exemple #9
0
 def test_nodata(self):
     # The file nodata.arff has no data in the @DATA section.
     # Reading it should result in an array with length 0.
     nodata_filename = os.path.join(data_path, 'nodata.arff')
     data, meta = loadarff(nodata_filename)
     expected_dtype = np.dtype([('sepallength', '<f8'),
                                ('sepalwidth', '<f8'),
                                ('petallength', '<f8'),
                                ('petalwidth', '<f8'),
                                ('class', 'S15')])
     assert_equal(data.dtype, expected_dtype)
     assert_equal(data.size, 0)
Exemple #10
0
def characterize(file_name):
    a = arffread.loadarff(file_name)
    df = pd.DataFrame(a[0])
    df = numerify_columns(df)
    X, y = df.loc[:, df.columns != 'LEAVE'], df['LEAVE']
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.3,
                                                        train_size=0.5)
    decision_tree(X_train, y_train)

    min_cols, max_cols = 3, len(X_train.columns)
    for evaluator in [evaluate_by_rank, evaluate_by_pca]:
        best_evaluated = best_ranker_accuracy(X_train, y_train, evaluator,
                                              min_cols, max_cols)
        log.info(f"Best {evaluator}: {best_evaluated}")
Exemple #11
0
 def test_missing(self):
     data, meta = loadarff(missing)
     for i in ['yop', 'yap']:
         assert_array_almost_equal(data[i], expect_missing[i])
Exemple #12
0
 def _test(self, test_file):
     data, meta = loadarff(test_file)
     for i in range(len(data)):
         for j in range(4):
             assert_array_almost_equal(expect4_data[i][j], data[i][j])
     assert_equal(meta.types(), expected_types)
Exemple #13
0
 def setup_method(self):
     self.data, self.meta = loadarff(test_quoted_nominal_spaces)
Exemple #14
0
 def setup_method(self):
     self.data, self.meta = loadarff(test10)
 def setup_method(self):
     self.data, self.meta = loadarff(test10)
Exemple #16
0
 def setUp(self):
     self.data, self.meta = loadarff(test7)
Exemple #17
0
 def setUp(self):
     self.data, self.meta = loadarff(test7)
Exemple #18
0
 def _test(self, test_file):
     data, meta = loadarff(test_file)
     for i in range(len(data)):
         for j in range(4):
             assert_array_almost_equal(expect4_data[i][j], data[i][j])
     assert_equal(meta.types(), expected_types)
Exemple #19
0
 def test_missing(self):
     data, meta = loadarff(missing)
     for i in ['yop', 'yap']:
         assert_array_almost_equal(data[i], expect_missing[i])
Exemple #20
0
import pandas as pd
import numpy as np
from scipy.io.arff import arffread
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold, cross_val_score
from sklearn.feature_selection import RFE
from sklearn.pipeline import Pipeline

if __name__ == '__main__':
    a = arffread.loadarff('../data/churn.arff')
    churn = pd.DataFrame(a[0])
    type_map = {}
    for c in churn.columns:
        if churn[c].dtype.name == 'object':
            churn[c] = churn[c].apply(lambda x: x.decode('utf8'))
            type_map[c] = ['empty'] + list(churn[c].unique())
            churn.loc[churn[c].isna(), c] = type_map[c][0]
            churn[c] = churn[c].apply(lambda l: type_map[c].index(l))
            churn[c].astype(int)
    X, y = churn.loc[:, churn.columns != 'LEAVE'], churn['LEAVE']
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.3,
                                                        train_size=0.5)
    classer = DecisionTreeClassifier()
    print(f"churn columns: {churn.columns}")
    result = classer.fit(X_train, y_train)
    print(f"CLASSER RESULT: {result}")
    important_cols = [
        c for i, c in enumerate(X.columns)
        if result.feature_importances_[i] > 0.07