예제 #1
0
 def __init__(
     self,
     name: str,
     model_params: Dict[str, Any],
 ) -> None:
     super().__init__(name, model_params)
     #self._fit_params = model_params.pop('fit_params')
     self._fit_params = {}
     self._model = StructuredDataClassifier(**model_params)
예제 #2
0
class AutoKerasBaselineModel(Model):
    def __init__(
        self,
        name: str,
        model_params: Dict[str, Any],
    ) -> None:
        super().__init__(name, model_params)
        #self._fit_params = model_params.pop('fit_params')
        self._fit_params = {}
        self._model = StructuredDataClassifier(**model_params)

    def _force_fit(self, X: np.ndarray, y: np.ndarray) -> None:
        self._model.fit(X, y, **self._fit_params)

    def fit(self, X: np.ndarray, y: np.ndarray) -> None:
        if not isinstance(self._model, StructuredDataClassifier):
            raise RuntimeError(
                'Due to AutoKeras being unsaveable, saving this' +
                ' means only the best keras model was' +
                ' saved. Calling fit will fit this pipeline' +
                ' rather than the AutoKeras algorithm. If this' +
                ' is desired behaviour, please use ' + '`_force_fit` instead')
        self._force_fit(X, y)

    def save(self, path: str) -> None:
        # Get the best model that can be saved
        best_model = self._model.export_model()
        best_model.save(path)

        # Also store the model wrapper after removing autokeras model
        # as it can't be saved
        self._model = None
        wrapper_path = path + '.wrapper'
        with open(wrapper_path, 'wb') as file:
            pickle.dump(self, file)

    @classmethod
    def load(cls, path: str):
        wrapper_class = None
        # Have to load model wrapper and the underlying keras model seperatly
        # as the autokeras model can't be saved
        wrapper_path = path + '.wrapper'
        with open(wrapper_path, 'rb') as file:
            wrapper_class = pickle.load(file)

        wrapper_class._model = keras.models.load_model(path)
        return wrapper_class

    def predict(self, X: np.ndarray) -> np.ndarray:
        if isinstance(self._model, StructuredDataClassifier):
            return self._model.predict(X)
        else:  # Already been exported,
            return np.round(self._model.predict(X))

    def predict_proba(self, X: np.ndarray) -> np.ndarray:
        if isinstance(self._model, StructuredDataClassifier):
            exported_model = self._model.export_model()
            return exported_model.predict(X)
        else:  # Already been exported
            return self._model.predict(X)
def auto_ml_classify(url, x_names, y_name, max_trials, test_size, X_new):
    # load dataset
    dataframe = read_csv(url)
    X = dataframe[x_names].values
    y = dataframe[y_name].values
    print(X.shape, y.shape)
    # basic data preparation
    X = X.astype('float32')
    y = LabelEncoder().fit_transform(y)
    # separate into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=test_size,
                                                        random_state=1)
    print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
    # define the search
    search = StructuredDataClassifier(max_trials=max_trials)
    # perform the search
    search.fit(x=X_train, y=y_train, verbose=0)
    # evaluate the model
    loss, acc = search.evaluate(X_test, y_test, verbose=0)
    print('Accuracy: %.3f' % acc)
    # use the model to make a prediction

    yhat = search.predict(X_new)
    print('Predicted: %.3f' % yhat[0])
    # get the best performing model
    model = search.export_model()
    # summarize the loaded model
    model.summary()
    # save the best performing model to file for next time you dont have to train
    model.save('model_sonar.h5')
예제 #4
0
                # set the seeds for reproducible results with TF (wont work with GPU, only CPU)
                np.random.seed(2)

                session_conf = tf.compat.v1.ConfigProto(
                    intra_op_parallelism_threads=1,
                    inter_op_parallelism_threads=1)

                # Force Tensorflow to use a single thread
                sess = tf.compat.v1.Session(
                    graph=tf.compat.v1.get_default_graph(),
                    config=session_conf)

                tf.compat.v1.keras.backend.set_session(sess)

                # define the search
                search = StructuredDataClassifier(max_trials=max_trials)
                # perform the search
                search.fit(x=X_train, y=y_train, verbose=0, epochs=epochs)

                y_pred_train = search.predict(X_train)
                y_pred_test = search.predict(X_test)

                # get the best performing model
                model = search.export_model()

                # Model summary
                s = io.StringIO()
                model.summary(print_fn=lambda x: s.write(x + '\n'))
                model_summary = s.getvalue()
                s.close()
예제 #5
0
# basic data preparation
#X = X.astype('float32')
y = LabelEncoder().fit_transform(y)
# separate into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
# define the search
search = StructuredDataClassifier(max_trials=100,
                                  column_types={
                                      'Pclass': 'categorical',
                                      'Sex': 'categorical',
                                      'Age': 'numerical',
                                      'SibSp': 'categorical',
                                      'Parch': 'numerical',
                                      'Ticket': 'categorical',
                                      'Fare': 'numerical',
                                      'Cabin': 'categorical',
                                      'Embarked': 'categorical'
                                  })
# perform the search
search.fit(x=X_train, y=y_train, verbose=0)
# evaluate the model
loss, acc = search.evaluate(X_test, y_test, verbose=0)
print('Accuracy: %.3f' % acc)
# use the model to make a prediction
#row = [0.0200,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,0.1609,0.1582,0.2238,0.0645,0.0660,0.2273,0.3100,0.2999,0.5078,0.4797,0.5783,0.5071,0.4328,0.5550,0.6711,0.6415,0.7104,0.8080,0.6791,0.3857,0.1307,0.2604,0.5121,0.7547,0.8537,0.8507,0.6692,0.6097,0.4943,0.2744,0.0510,0.2834,0.2825,0.4256,0.2641,0.1386,0.1051,0.1343,0.0383,0.0324,0.0232,0.0027,0.0065,0.0159,0.0072,0.0167,0.0180,0.0084,0.0090,0.0032]
#X_new = np.asarray([row]).astype('float32')
#yhat = search.predict(X_new)
#print('Predicted: %.3f' % yhat[0])
import pandas as pd #import pandas
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from autokeras import StructuredDataClassifier

df = pd.read_csv('/content/drive/MyDrive/breast-cancer-wisconsin.data.txt')
# basic data preparation
X = np.array(df.drop(['class'], 1)) #input
#X = X.astype('float32')
y = np.array(df['class'])   #output
# integer encode
y = LabelEncoder().fit_transform(y)
# Look at the dataset again
print(f'Number of Rows: {df.shape[0]}')
print(f'Number of Columns: {df.shape[1]}')
print(df.head())

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
search = StructuredDataClassifier(max_trials=5)
search.fit(x=X_train, y=y_train, verbose=1)
loss, acc = search.evaluate(X_test, y_test, verbose=0) #classification
print('Accuracy: %.3f' % acc)
print('loss: %.3f' % loss)
y_predictions = search.predict(X_test)
model = search.export_model()
model.summary()
print("\n%s: %.2f%%" % (model.metrics_names[1], acc*100))
model.save('breast_cancer_model.tf')

from keras.utils.vis_utils import plot_model
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)
X, y = data[:, :-1], data[:, -1]
print(X.shape, y.shape)

# basic data preparation
X = X.astype('float32')
y = LabelEncoder().fit_transform(y)

# separate into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

# define the search
search = StructuredDataClassifier(max_trials=15)

# perform the search
search.fit(x=X_train, y=y_train, verbose=0)

# evaluate the model
loss, acc = search.evaluate(X_test, y_test, verbose=0)
print('Accuracy: %.3f' % acc)

# use the model to make a prediction
row = [
    0.0200, 0.0371, 0.0428, 0.0207, 0.0954, 0.0986, 0.1539, 0.1601, 0.3109,
    0.2111, 0.1609, 0.1582, 0.2238, 0.0645, 0.0660, 0.2273, 0.3100, 0.2999,
    0.5078, 0.4797, 0.5783, 0.5071, 0.4328, 0.5550, 0.6711, 0.6415, 0.7104,
    0.8080, 0.6791, 0.3857, 0.1307, 0.2604, 0.5121, 0.7547, 0.8537, 0.8507,
    0.6692, 0.6097, 0.4943, 0.2744, 0.0510, 0.2834, 0.2825, 0.4256, 0.2641,
예제 #8
0
print(dataframe.shape)
# split into input and output elements
data = dataframe.values
X, y = data[:, :-1], data[:, -1]
print(X.shape, y.shape)
# basic data preparation
X = X.astype('float32')
y = LabelEncoder().fit_transform(y)
# separate into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
# define the search
search = StructuredDataClassifier(max_trials=15)
# perform the search
search.fit(x=X_train, y=y_train, verbose=0)
# evaluate the model
loss, acc = search.evaluate(X_test, y_test, verbose=0)
print('Accuracy: %.3f' % acc)
# use the model to make a prediction
row = [
    0.0200, 0.0371, 0.0428, 0.0207, 0.0954, 0.0986, 0.1539, 0.1601, 0.3109,
    0.2111, 0.1609, 0.1582, 0.2238, 0.0645, 0.0660, 0.2273, 0.3100, 0.2999,
    0.5078, 0.4797, 0.5783, 0.5071, 0.4328, 0.5550, 0.6711, 0.6415, 0.7104,
    0.8080, 0.6791, 0.3857, 0.1307, 0.2604, 0.5121, 0.7547, 0.8537, 0.8507,
    0.6692, 0.6097, 0.4943, 0.2744, 0.0510, 0.2834, 0.2825, 0.4256, 0.2641,
    0.1386, 0.1051, 0.1343, 0.0383, 0.0324, 0.0232, 0.0027, 0.0065, 0.0159,
    0.0072, 0.0167, 0.0180, 0.0084, 0.0090, 0.0032
]
예제 #9
0
print("shape of the input")
print(X.shape, y.shape)
X = X.astype('float32')
y = LabelEncoder().fit_transform(y)
# separate into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.33,
                                                    stratify=y,
                                                    random_state=42)
print("shape of the splitting Data X_train, X_test, y_train, y_test")
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

# define the search space
search = StructuredDataClassifier(
    max_trials=30,
    tuner='random',
    metrics=[tf.keras.metrics.CategoricalAccuracy(), 'accuracy'])

# perform the search
search.fit(x=X_train, y=y_train, verbose=1)

# evaluate the model
loss, acc = search.evaluate(X_test, y_test, verbose=0)
print('Modell Accuracy')
print('Accuracy: %.3f' % acc)

# get the best performing model
model = search.export_model()

# summarize the loaded model
model.summary()