def auto_ml_classify(url, x_names, y_name, max_trials, test_size, X_new):
    # load dataset
    dataframe = read_csv(url)
    X = dataframe[x_names].values
    y = dataframe[y_name].values
    print(X.shape, y.shape)
    # basic data preparation
    X = X.astype('float32')
    y = LabelEncoder().fit_transform(y)
    # separate into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=test_size,
                                                        random_state=1)
    print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
    # define the search
    search = StructuredDataClassifier(max_trials=max_trials)
    # perform the search
    search.fit(x=X_train, y=y_train, verbose=0)
    # evaluate the model
    loss, acc = search.evaluate(X_test, y_test, verbose=0)
    print('Accuracy: %.3f' % acc)
    # use the model to make a prediction

    yhat = search.predict(X_new)
    print('Predicted: %.3f' % yhat[0])
    # get the best performing model
    model = search.export_model()
    # summarize the loaded model
    model.summary()
    # save the best performing model to file for next time you dont have to train
    model.save('model_sonar.h5')
Esempio n. 2
0
                model.summary(print_fn=lambda x: s.write(x + '\n'))
                model_summary = s.getvalue()
                s.close()

                print("The model summary is:\n\n{}".format(model_summary))
                st.info('**Model summary**')
                plt.text(0.1, 0.1, model_summary)
                plt.setp(plt.gca(), frame_on=False, xticks=(), yticks=())
                plt.grid(False)
                st.pyplot()

                # Training set
                st.info('**Model evaluation**')
                st.write('**Training Set**')
                # evaluate the model
                loss, acc = search.evaluate(X_train, y_train, verbose=0)
                st.write('Accuracy: %.3f' % acc)
                # precision tp / (tp + fp)
                precision = precision_score(y_train, y_pred_train)
                st.write('Precision: %f' % precision)
                # recall: tp / (tp + fn)
                recall = recall_score(y_train, y_pred_train)
                st.write('Sensitivity/Recall: %f' % recall)
                # f1: 2 tp / (2 tp + fp + fn)
                f1 = f1_score(y_train, y_pred_train)
                st.write('F1 score: %f' % f1)
                # ROC AUC
                auc = roc_auc_score(y_train, y_pred_train)
                st.write('ROC AUC: %f' % auc)
                # confusion matrix
                st.write("Confusion matrix")
Esempio n. 3
0
                                                    random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
# define the search
search = StructuredDataClassifier(max_trials=100,
                                  column_types={
                                      'Pclass': 'categorical',
                                      'Sex': 'categorical',
                                      'Age': 'numerical',
                                      'SibSp': 'categorical',
                                      'Parch': 'numerical',
                                      'Ticket': 'categorical',
                                      'Fare': 'numerical',
                                      'Cabin': 'categorical',
                                      'Embarked': 'categorical'
                                  })
# perform the search
search.fit(x=X_train, y=y_train, verbose=0)
# evaluate the model
loss, acc = search.evaluate(X_test, y_test, verbose=0)
print('Accuracy: %.3f' % acc)
# use the model to make a prediction
#row = [0.0200,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,0.1609,0.1582,0.2238,0.0645,0.0660,0.2273,0.3100,0.2999,0.5078,0.4797,0.5783,0.5071,0.4328,0.5550,0.6711,0.6415,0.7104,0.8080,0.6791,0.3857,0.1307,0.2604,0.5121,0.7547,0.8537,0.8507,0.6692,0.6097,0.4943,0.2744,0.0510,0.2834,0.2825,0.4256,0.2641,0.1386,0.1051,0.1343,0.0383,0.0324,0.0232,0.0027,0.0065,0.0159,0.0072,0.0167,0.0180,0.0084,0.0090,0.0032]
#X_new = np.asarray([row]).astype('float32')
#yhat = search.predict(X_new)
#print('Predicted: %.3f' % yhat[0])
# get the best performing model
model = search.export_model()
# summarize the loaded model
model.summary()
# save the best performing model to file
model.save('model_tt')
import pandas as pd #import pandas
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from autokeras import StructuredDataClassifier

df = pd.read_csv('/content/drive/MyDrive/breast-cancer-wisconsin.data.txt')
# basic data preparation
X = np.array(df.drop(['class'], 1)) #input
#X = X.astype('float32')
y = np.array(df['class'])   #output
# integer encode
y = LabelEncoder().fit_transform(y)
# Look at the dataset again
print(f'Number of Rows: {df.shape[0]}')
print(f'Number of Columns: {df.shape[1]}')
print(df.head())

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
search = StructuredDataClassifier(max_trials=5)
search.fit(x=X_train, y=y_train, verbose=1)
loss, acc = search.evaluate(X_test, y_test, verbose=0) #classification
print('Accuracy: %.3f' % acc)
print('loss: %.3f' % loss)
y_predictions = search.predict(X_test)
model = search.export_model()
model.summary()
print("\n%s: %.2f%%" % (model.metrics_names[1], acc*100))
model.save('breast_cancer_model.tf')

from keras.utils.vis_utils import plot_model
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)