def __init__( self, name: str, model_params: Dict[str, Any], ) -> None: super().__init__(name, model_params) #self._fit_params = model_params.pop('fit_params') self._fit_params = {} self._model = StructuredDataClassifier(**model_params)
class AutoKerasBaselineModel(Model): def __init__( self, name: str, model_params: Dict[str, Any], ) -> None: super().__init__(name, model_params) #self._fit_params = model_params.pop('fit_params') self._fit_params = {} self._model = StructuredDataClassifier(**model_params) def _force_fit(self, X: np.ndarray, y: np.ndarray) -> None: self._model.fit(X, y, **self._fit_params) def fit(self, X: np.ndarray, y: np.ndarray) -> None: if not isinstance(self._model, StructuredDataClassifier): raise RuntimeError( 'Due to AutoKeras being unsaveable, saving this' + ' means only the best keras model was' + ' saved. Calling fit will fit this pipeline' + ' rather than the AutoKeras algorithm. If this' + ' is desired behaviour, please use ' + '`_force_fit` instead') self._force_fit(X, y) def save(self, path: str) -> None: # Get the best model that can be saved best_model = self._model.export_model() best_model.save(path) # Also store the model wrapper after removing autokeras model # as it can't be saved self._model = None wrapper_path = path + '.wrapper' with open(wrapper_path, 'wb') as file: pickle.dump(self, file) @classmethod def load(cls, path: str): wrapper_class = None # Have to load model wrapper and the underlying keras model seperatly # as the autokeras model can't be saved wrapper_path = path + '.wrapper' with open(wrapper_path, 'rb') as file: wrapper_class = pickle.load(file) wrapper_class._model = keras.models.load_model(path) return wrapper_class def predict(self, X: np.ndarray) -> np.ndarray: if isinstance(self._model, StructuredDataClassifier): return self._model.predict(X) else: # Already been exported, return np.round(self._model.predict(X)) def predict_proba(self, X: np.ndarray) -> np.ndarray: if isinstance(self._model, StructuredDataClassifier): exported_model = self._model.export_model() return exported_model.predict(X) else: # Already been exported return self._model.predict(X)
def auto_ml_classify(url, x_names, y_name, max_trials, test_size, X_new): # load dataset dataframe = read_csv(url) X = dataframe[x_names].values y = dataframe[y_name].values print(X.shape, y.shape) # basic data preparation X = X.astype('float32') y = LabelEncoder().fit_transform(y) # separate into train and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=1) print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) # define the search search = StructuredDataClassifier(max_trials=max_trials) # perform the search search.fit(x=X_train, y=y_train, verbose=0) # evaluate the model loss, acc = search.evaluate(X_test, y_test, verbose=0) print('Accuracy: %.3f' % acc) # use the model to make a prediction yhat = search.predict(X_new) print('Predicted: %.3f' % yhat[0]) # get the best performing model model = search.export_model() # summarize the loaded model model.summary() # save the best performing model to file for next time you dont have to train model.save('model_sonar.h5')
# set the seeds for reproducible results with TF (wont work with GPU, only CPU) np.random.seed(2) session_conf = tf.compat.v1.ConfigProto( intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) # Force Tensorflow to use a single thread sess = tf.compat.v1.Session( graph=tf.compat.v1.get_default_graph(), config=session_conf) tf.compat.v1.keras.backend.set_session(sess) # define the search search = StructuredDataClassifier(max_trials=max_trials) # perform the search search.fit(x=X_train, y=y_train, verbose=0, epochs=epochs) y_pred_train = search.predict(X_train) y_pred_test = search.predict(X_test) # get the best performing model model = search.export_model() # Model summary s = io.StringIO() model.summary(print_fn=lambda x: s.write(x + '\n')) model_summary = s.getvalue() s.close()
# basic data preparation #X = X.astype('float32') y = LabelEncoder().fit_transform(y) # separate into train and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1) print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) # define the search search = StructuredDataClassifier(max_trials=100, column_types={ 'Pclass': 'categorical', 'Sex': 'categorical', 'Age': 'numerical', 'SibSp': 'categorical', 'Parch': 'numerical', 'Ticket': 'categorical', 'Fare': 'numerical', 'Cabin': 'categorical', 'Embarked': 'categorical' }) # perform the search search.fit(x=X_train, y=y_train, verbose=0) # evaluate the model loss, acc = search.evaluate(X_test, y_test, verbose=0) print('Accuracy: %.3f' % acc) # use the model to make a prediction #row = [0.0200,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,0.1609,0.1582,0.2238,0.0645,0.0660,0.2273,0.3100,0.2999,0.5078,0.4797,0.5783,0.5071,0.4328,0.5550,0.6711,0.6415,0.7104,0.8080,0.6791,0.3857,0.1307,0.2604,0.5121,0.7547,0.8537,0.8507,0.6692,0.6097,0.4943,0.2744,0.0510,0.2834,0.2825,0.4256,0.2641,0.1386,0.1051,0.1343,0.0383,0.0324,0.0232,0.0027,0.0065,0.0159,0.0072,0.0167,0.0180,0.0084,0.0090,0.0032] #X_new = np.asarray([row]).astype('float32') #yhat = search.predict(X_new) #print('Predicted: %.3f' % yhat[0])
import pandas as pd #import pandas from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder from autokeras import StructuredDataClassifier df = pd.read_csv('/content/drive/MyDrive/breast-cancer-wisconsin.data.txt') # basic data preparation X = np.array(df.drop(['class'], 1)) #input #X = X.astype('float32') y = np.array(df['class']) #output # integer encode y = LabelEncoder().fit_transform(y) # Look at the dataset again print(f'Number of Rows: {df.shape[0]}') print(f'Number of Columns: {df.shape[1]}') print(df.head()) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) search = StructuredDataClassifier(max_trials=5) search.fit(x=X_train, y=y_train, verbose=1) loss, acc = search.evaluate(X_test, y_test, verbose=0) #classification print('Accuracy: %.3f' % acc) print('loss: %.3f' % loss) y_predictions = search.predict(X_test) model = search.export_model() model.summary() print("\n%s: %.2f%%" % (model.metrics_names[1], acc*100)) model.save('breast_cancer_model.tf') from keras.utils.vis_utils import plot_model plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)
X, y = data[:, :-1], data[:, -1] print(X.shape, y.shape) # basic data preparation X = X.astype('float32') y = LabelEncoder().fit_transform(y) # separate into train and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1) print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) # define the search search = StructuredDataClassifier(max_trials=15) # perform the search search.fit(x=X_train, y=y_train, verbose=0) # evaluate the model loss, acc = search.evaluate(X_test, y_test, verbose=0) print('Accuracy: %.3f' % acc) # use the model to make a prediction row = [ 0.0200, 0.0371, 0.0428, 0.0207, 0.0954, 0.0986, 0.1539, 0.1601, 0.3109, 0.2111, 0.1609, 0.1582, 0.2238, 0.0645, 0.0660, 0.2273, 0.3100, 0.2999, 0.5078, 0.4797, 0.5783, 0.5071, 0.4328, 0.5550, 0.6711, 0.6415, 0.7104, 0.8080, 0.6791, 0.3857, 0.1307, 0.2604, 0.5121, 0.7547, 0.8537, 0.8507, 0.6692, 0.6097, 0.4943, 0.2744, 0.0510, 0.2834, 0.2825, 0.4256, 0.2641,
print(dataframe.shape) # split into input and output elements data = dataframe.values X, y = data[:, :-1], data[:, -1] print(X.shape, y.shape) # basic data preparation X = X.astype('float32') y = LabelEncoder().fit_transform(y) # separate into train and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1) print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) # define the search search = StructuredDataClassifier(max_trials=15) # perform the search search.fit(x=X_train, y=y_train, verbose=0) # evaluate the model loss, acc = search.evaluate(X_test, y_test, verbose=0) print('Accuracy: %.3f' % acc) # use the model to make a prediction row = [ 0.0200, 0.0371, 0.0428, 0.0207, 0.0954, 0.0986, 0.1539, 0.1601, 0.3109, 0.2111, 0.1609, 0.1582, 0.2238, 0.0645, 0.0660, 0.2273, 0.3100, 0.2999, 0.5078, 0.4797, 0.5783, 0.5071, 0.4328, 0.5550, 0.6711, 0.6415, 0.7104, 0.8080, 0.6791, 0.3857, 0.1307, 0.2604, 0.5121, 0.7547, 0.8537, 0.8507, 0.6692, 0.6097, 0.4943, 0.2744, 0.0510, 0.2834, 0.2825, 0.4256, 0.2641, 0.1386, 0.1051, 0.1343, 0.0383, 0.0324, 0.0232, 0.0027, 0.0065, 0.0159, 0.0072, 0.0167, 0.0180, 0.0084, 0.0090, 0.0032 ]
print("shape of the input") print(X.shape, y.shape) X = X.astype('float32') y = LabelEncoder().fit_transform(y) # separate into train and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, stratify=y, random_state=42) print("shape of the splitting Data X_train, X_test, y_train, y_test") print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) # define the search space search = StructuredDataClassifier( max_trials=30, tuner='random', metrics=[tf.keras.metrics.CategoricalAccuracy(), 'accuracy']) # perform the search search.fit(x=X_train, y=y_train, verbose=1) # evaluate the model loss, acc = search.evaluate(X_test, y_test, verbose=0) print('Modell Accuracy') print('Accuracy: %.3f' % acc) # get the best performing model model = search.export_model() # summarize the loaded model model.summary()