예제 #1
0
def run(dataset_train, dataset_test, pop_size, gens, cross_rate, fb, max_time=1200):

    Xtrain, ytrain = dataset_train[:, :-1], dataset_train[:, -1]
    Xtest,  ytest  = dataset_test[:, :-1],  dataset_test[:, -1]
    
    est_gp = Feat(obj="fitness,complexity",
               pop_size=pop_size,
               gens=gens,
               max_time=max_time,
               max_stall=50,
               batch_size=10000,
               ml = "LinearRidgeRegression",
               sel='lexicase',
               surv='nsga2',
               max_depth=10,
               max_dim=min([Xtrain.shape[1]*2,50]),
               #random_state=random_seed,
               functions="+,-,*,/,sqrt,sin,cos,tanh,exp,log,^,x,kd",
               otype="f",
               backprop=True,
               iters=10,
               n_threads=1,
               verbosity=1,
               # tuned parameters
               cross_rate= cross_rate,
               fb = fb,
               root_xo_rate = 0.75,
               softmax_norm = False
               )
    
    est_gp.fit(Xtrain, ytrain)
    
    return RMSE(est_gp.predict(Xtrain), ytrain), RMSE(est_gp.predict(Xtest), ytest), est_gp.get_model()
예제 #2
0
파일: wrappertest.py 프로젝트: hsarv/feat
    def test_saving_loading(self):
        self.debug("Pickle Feat object")
    
        reg = clone(self.reg) 
        reg.fit(self.X, self.yr)
        initial_pred = reg.predict(self.X)
        reg.save('Feat_tmp.json')

        loaded_reg = Feat().load('Feat_tmp.json')
        # print('loaded_reg:',type(loaded_reg).__name__)
        loaded_pred = loaded_reg.predict(self.X)
        # print('initial pred:',initial_pred)
        # print('loaded pred:',loaded_pred)
        diff = np.abs(initial_pred-loaded_pred)
        for i,d in enumerate(diff):
            if d > 0.0001:
                print('pred:',initial_pred[i],'loaded:',loaded_pred[i],
                      'diff:',d)
            assert(d < 0.0001)
        # assert(all([ip==lp for ip,lp in zip(initial_pred, loaded_pred)]))

        assert(reg.get_representation() == loaded_reg.get_representation())
        assert(reg.get_model() == loaded_reg.get_model())
        assert((reg.get_coefs() == loaded_reg.get_coefs()).all())
        loaded_params = loaded_reg.get_params()
        # print('\n',10*'=','\n')
        # print('loaded_params:')
        # for k,v in loaded_params.items():
        #     print(k,':',v)

        for k,v in reg.get_params().items():
            if k not in loaded_params.keys():
                print(k,'not in ',loaded_params.keys())
                assert(k in loaded_params.keys())
            if isinstance(v,float):
                if np.abs(loaded_params[k] - v) > 0.0001:
                    print('loaded_params[',k,'] =',
                      loaded_params[k], '\nwhich is different from:', v)
                assert(np.abs(loaded_params[k] - v) < 0.0001)
            elif loaded_params[k] != v:
                print('loaded_params[',k,'] =',
                      loaded_params[k], '\nwhich is different from:', v)
                assert(loaded_params[k] == v)

        loaded_reg.fit(self.X, self.yr)
예제 #3
0
class TestFeatWrapper(unittest.TestCase):
    def setUp(self):
        self.v = verbosity
        self.clf = Feat(verbosity=verbosity, n_threads=1)
        diabetes = load_diabetes()
        self.X = diabetes.data
        self.y = diabetes.target

    #Test 1: Assert the length of labels returned from predict
    def test_predict_length(self):
        self.debug("Fit the Data")
        self.clf.fit(self.X, self.y)

        self.debug("Predicting the Results")
        pred = self.clf.predict(self.X)

        self.debug("Comparing the Length of labls in Predicted vs Actual ")
        expected_length = len(self.y)
        actual_length = len(pred)
        self.assertEqual(actual_length, expected_length)

    #Test 2:  Assert the length of labels returned from fit_predict
    def test_fitpredict_length(self):
        self.debug("Calling fit_predict from Feat")
        pred = self.clf.fit_predict(self.X, self.y)

        self.debug("Comparing the length of labls in fit_predict vs actual ")
        expected_length = len(self.y)
        actual_length = len(pred)
        self.assertEqual(actual_length, expected_length)

    #Test 3:  Assert the length of labels returned from transform
    def test_transform_length(self):
        self.debug("Calling fit")
        self.clf.fit(self.X, self.y)
        trans_X = self.clf.transform(self.X)

        self.debug(
            "Comparing the length of labls in transform vs actual feature set "
        )
        expected_value = self.X.shape[0]
        actual_value = trans_X.shape[0]
        self.assertEqual(actual_value, expected_value)

    #Test 4:  Assert the length of labels returned from fit_transform
    def test_fit_transform_length(self):
        self.debug("In wrappertest.py...Calling fit transform")
        trans_X = self.clf.fit_transform(self.X, self.y)

        self.debug(
            "Comparing the length of labls in transform vs actual feature set "
        )
        expected_value = self.X.shape[0]
        actual_value = trans_X.shape[0]
        self.assertEqual(actual_value, expected_value)

    #Test 5:  Transform with Z
    def test_transform_length_z(self, zfile=None, zids=None):
        self.debug("Calling fit")
        self.clf.fit(self.X, self.y)
        trans_X = self.clf.transform(self.X, zfile, zids)

        self.debug(
            "Comparing the length of labls in transform vs actual feature set "
        )
        expected_value = self.X.shape[0]
        actual_value = trans_X.shape[0]
        self.assertEqual(actual_value, expected_value)

    def debug(self, message):
        if (self.v > 0):
            print(message)

    def test_coefs(self):
        self.debug("In wrappertest.py...Calling test_coefs")
        self.clf.fit(self.X, self.y)
        coefs = self.clf.get_coefs()
        self.assertTrue(len(coefs) > 0)

    def test_dataframe(self):
        self.debug("In wrappertest.py...Calling test_dataframe")
        dfX = pd.DataFrame(
            data=self.X,
            columns=['fishy' + str(i) for i in np.arange(self.X.shape[1])],
            index=None)
        dfy = pd.DataFrame(data={'label': self.y})

        self.clf.fit(dfX, dfy['label'])
        assert (self.clf.feature_names == ','.join(dfX.columns).encode())

    #Test: Assert the length of labels returned from predict
    def test_predict_stats_length(self):
        self.debug("Fit the Data")
        self.clf.fit(self.X, self.y)

        for key in self.clf.stats:
            self.assertEqual(len(self.clf.stats[key]), self.clf.gens)

    #Test ability to pickle feat model
    def test_pickling(self):
        self.debug("Pickle Feat object")

        with open('test_pickle.pkl', 'wb') as f:
            pickle.dump(self.clf, f)

        with open('test_pickle.pkl', 'rb') as f:
            loaded_clf = pickle.load(f)

        assert (loaded_clf.get_params() == self.clf.get_params())

    def test_archive(self):
        """test archiving ability"""
        self.debug("Test archive")

        self.clf.classification = True
        self.clf.ml = b'LR'
        self.clf.fit(self.X, np.array(self.y > np.median(self.y),
                                      dtype=np.int))
        archive = self.clf.get_archive()
        preds = self.clf.predict_archive(self.X)
        probs = self.clf.predict_proba_archive(self.X)

        for arch, pred, prob in zip(archive, preds, probs):
            self.assertTrue(arch['id'] == pred['id'])
            self.assertTrue(arch['id'] == prob['id'])

    def test_lr_l1(self):
        """testing l1 penalized LR"""
        self.clf.classification = True
        self.clf.ml = b'L1_LR'
        self.clf.fit(self.X, np.array(self.y > np.median(self.y),
                                      dtype=np.int))

        self.assertEqual(len(self.clf.predict(self.X)), len(self.y))
예제 #4
0
    pop_size=200,
    # ml='CART',
    ml='LR',
    verbosity=1,
    shuffle=True,
    classification=True,
    backprop=True,
    random_state=42)
lr = LR()
rocs = []
aucs = []
lr_rocs = []
lr_aucs = []

for train_idx, test_idx in kf.split(X):
    clf.fit(X[train_idx], y[train_idx])
    lr.fit(X[train_idx], y[train_idx])

    probabilities = clf.predict_proba(X[test_idx])
    lr_probabilities = lr.predict_proba(X[test_idx])

    fpr, tpr, _ = roc_curve(y[test_idx], probabilities[:, 1])
    lr_fpr, lr_tpr, _ = roc_curve(y[test_idx], lr_probabilities[:, 1])

    aucs.append(auc(fpr, tpr))
    lr_aucs.append(auc(lr_fpr, lr_tpr))

    rocs.append((fpr, tpr))
    lr_rocs.append((lr_fpr, lr_tpr))

import matplotlib.pyplot as plt
예제 #5
0
import numpy as np

from feat import Feat
from sklearn.model_selection import KFold

df = pd.read_csv('d_example_patients.csv')
df.drop('id', axis=1, inplace=True)
X = df.drop('class', axis=1).values
y = df['class'].values
zfile = 'd_example_patients_long.csv'
kf = KFold(n_splits=3)
kf.get_n_splits(X)

clf = Feat(
    max_depth=5,
    max_dim=min(50, 2 * X.shape[1]),
    verbosity=1,
    shuffle=True,
    ml='LR',
    classification=True,
    functions=
    "max,+,-,*,/,exp,log,and,or,not,=,<,>,ite,mean,median,min,variance,skew,kurtosis,slope,count",
    random_state=42)
scores = []
for train_idx, test_idx in kf.split(X):
    clf.fit(X[train_idx], y[train_idx], zfile, train_idx)
    scores.append(clf.score(X[test_idx], y[test_idx], zfile, test_idx))

print('scores:', scores)
예제 #6
0
class TestFeatWrapper(unittest.TestCase):

    def setUp(self):
        self.v = verbosity
        self.clf = Feat(verbosity=self.v)
        diabetes = load_diabetes()
        self.X = diabetes.data
        self.y = diabetes.target
        
    #Test 1: Assert the length of labels returned from predict
    def test_predict_length(self):
        self.debug("Fit the Data")
        self.clf.fit(self.X,self.y)

        self.debug("Predicting the Results")
        pred = self.clf.predict(self.X)

        self.debug("Comparing the Length of labls in Predicted vs Actual ")
        expected_length = len(self.y)
        actual_length = len(pred)
        self.assertEqual( actual_length , expected_length )

    #Test 2:  Assert the length of labels returned from fit_predict
    def test_fitpredict_length(self):
        self.debug("Calling fit_predict from Feat")
        pred = self.clf.fit_predict(self.X,self.y)

        self.debug("Comparing the length of labls in fit_predict vs actual ")
        expected_length = len(self.y)
        actual_length = len(pred)
        self.assertEqual( actual_length , expected_length )

    #Test 3:  Assert the length of labels returned from transform
    def test_transform_length(self):
        self.debug("Calling fit")
        self.clf.fit(self.X,self.y)
        trans_X = self.clf.transform(self.X)

        self.debug("Comparing the length of labls in transform vs actual feature set ")
        expected_value = self.X.shape[0]
        actual_value = trans_X.shape[0]
        self.assertEqual( actual_value , expected_value )

    #Test 4:  Assert the length of labels returned from fit_transform
    def test_fit_transform_length(self):
        self.debug("In wrappertest.py...Calling fit transform")
        trans_X = self.clf.fit_transform(self.X,self.y)

        self.debug("Comparing the length of labls in transform vs actual feature set ")
        expected_value = self.X.shape[0]
        actual_value = trans_X.shape[0]
        self.assertEqual( actual_value , expected_value )
        
    #Test 5:  Transform with Z
    def test_transform_length_z(self,zfile=None,zids=None):
        self.debug("Calling fit")
        self.clf.fit(self.X,self.y)
        trans_X = self.clf.transform(self.X,zfile,zids)

        self.debug("Comparing the length of labls in transform vs actual feature set ")
        expected_value = self.X.shape[0]
        actual_value = trans_X.shape[0]
        self.assertEqual( actual_value , expected_value )

    def debug(self,message):
        if ( self.v > 0 ):
            print (message)

    def test_coefs(self):
        self.debug("In wrappertest.py...Calling test_coefs")
        self.clf.fit(self.X,self.y)
        coefs = self.clf.get_coefs()
        print('coefs:',coefs)
        self.assertTrue( len(coefs)>0 )
예제 #7
0
import pandas as pd
from pmlb import fetch_data

df = pd.read_csv('mnist.csv', sep='\t')
print(df.columns)
X = df.drop('class', axis=1).values
y = df['class'].values

from feat import Feat

ft = Feat(classification=True, verbosity=2)

ft.fit(X[:60000], y[:60000])

print(ft.score(X[60000:], y[60000:]))
예제 #8
0
    pop_size=100,
    verbosity=1,
    shuffle=True,
    ml='LR',
    classification=True,
    feature_names=','.join(df.drop('class', axis=1).columns),
    functions="+,-,*,/,exp,log,and,or,not,=,<,<=,>,>=,ite,split,split_c,"
    "mean,median,max,min,variance,skew,kurtosis,slope,count",
    backprop=True,
    iters=10,
    random_state=42)
scores = []

for train_idx, test_idx in kf.split(X, y):
    # print('train_idx:',train_idx)
    clf.fit(X[train_idx], y[train_idx], zfile, train_idx)
    scores.append(clf.score(X[test_idx], y[test_idx], zfile, test_idx))

print('scores:', scores)

###################################################################################################
# fit to all data
###################################################################################################

print('fitting longer to all data...')
clf.gens = 20
clf.verbosity = 2
clf.fit(X, y, zfile, np.arange(len(X)))
print('model:', clf.get_model())

##################################################################################################
예제 #9
0
import pandas as pd

import numpy as np

from feat import Feat
import sys
seed = sys.argv[1]

df = pd.read_csv('../examples/d_heart.csv', sep=',')
df.describe()
X = df.drop('class', axis=1).values
y = df['class'].values
clf = Feat(max_depth=3,
           max_dim=1,
           gens=100,
           pop_size=200,
           verbosity=2,
           shuffle=True,
           classification=True,
           functions="+,-,*,/,exp,log,and,or,not,=,<,>,ite",
           random_state=seed,
           softmax_norm=True)
clf.fit(X, y)
예제 #10
0
class TestFeatWrapper(unittest.TestCase):

    def setUp(self):
        self.v = verbosity
        self.clf = Feat(verbosity=verbosity, n_threads=1)
        diabetes = load_diabetes()
        self.X = diabetes.data
        self.y = diabetes.target
        
    #Test 1: Assert the length of labels returned from predict
    def test_predict_length(self):
        self.debug("Fit the Data")
        self.clf.fit(self.X,self.y)

        self.debug("Predicting the Results")
        pred = self.clf.predict(self.X)

        self.debug("Comparing the Length of labls in Predicted vs Actual ")
        expected_length = len(self.y)
        actual_length = len(pred)
        self.assertEqual( actual_length , expected_length )

    #Test 2:  Assert the length of labels returned from fit_predict
    def test_fitpredict_length(self):
        self.debug("Calling fit_predict from Feat")
        pred = self.clf.fit_predict(self.X,self.y)

        self.debug("Comparing the length of labls in fit_predict vs actual ")
        expected_length = len(self.y)
        actual_length = len(pred)
        self.assertEqual( actual_length , expected_length )

    #Test 3:  Assert the length of labels returned from transform
    def test_transform_length(self):
        self.debug("Calling fit")
        self.clf.fit(self.X,self.y)
        trans_X = self.clf.transform(self.X)

        self.debug("Comparing the length of labls in transform vs actual feature set ")
        expected_value = self.X.shape[0]
        actual_value = trans_X.shape[0]
        self.assertEqual( actual_value , expected_value )

    #Test 4:  Assert the length of labels returned from fit_transform
    def test_fit_transform_length(self):
        self.debug("In wrappertest.py...Calling fit transform")
        trans_X = self.clf.fit_transform(self.X,self.y)

        self.debug("Comparing the length of labls in transform vs actual feature set ")
        expected_value = self.X.shape[0]
        actual_value = trans_X.shape[0]
        self.assertEqual( actual_value , expected_value )
        
    #Test 5:  Transform with Z
    def test_transform_length_z(self,zfile=None,zids=None):
        self.debug("Calling fit")
        self.clf.fit(self.X,self.y)
        trans_X = self.clf.transform(self.X,zfile,zids)

        self.debug("Comparing the length of labls in transform vs actual feature set ")
        expected_value = self.X.shape[0]
        actual_value = trans_X.shape[0]
        self.assertEqual( actual_value , expected_value )

    def debug(self,message):
        if ( self.v > 0 ):
            print (message)

    def test_coefs(self):
        self.debug("In wrappertest.py...Calling test_coefs")
        self.clf.fit(self.X,self.y)
        coefs = self.clf.get_coefs()
        print('coefs:',coefs)
        self.assertTrue( len(coefs)>0 )

    def test_dataframe(self):
        self.debug("In wrappertest.py...Calling test_dataframe")
        dfX = pd.DataFrame(data=self.X,columns=['fishy'+str(i) 
                                        for i in np.arange(self.X.shape[1])],
                                        index=None)
        # print(dfX.head())
        # print('dfX.columns:',dfX.columns)
        dfy = pd.DataFrame(data={'label':self.y})

        self.clf.fit(dfX,dfy['label'])
        # print('clf feature_names:',self.clf.feature_names)
        # print('dfX.columns:',','.join(dfX.columns).encode())
        assert(self.clf.feature_names == ','.join(dfX.columns).encode())

    #Test: Assert the length of labels returned from predict
    def test_predict_stats_length(self):
        self.debug("Fit the Data")
        self.clf.fit(self.X,self.y)

        print("Num generations is ", self.clf.gens)
        for key in self.clf.stats:
            print("Length for ", key, "is ", len(self.clf.stats[key]))
            self.assertEqual(len(self.clf.stats[key]), self.clf.gens)