def train_model(model_type: str, train: np.lib.npyio.NpzFile, test: np.lib.npyio.NpzFile, crime: str, data_type: str) -> None: x, y = train['x'], train['y'] # x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=test_size, shuffle=False, random_state=42) weight_ratio = float(len(y[y == 1])) / float(len(y[y == 0])) results = pd.DataFrame({'crime': [],'model': [], 'data_type': [], 'month': [],'f1': [], 'auc': []}) if model_type == 'xgboost': model = XGBClassifier(n_estimators=200, objective='binary:logistic', n_jobs=-1, scale_pos_weight=weight_ratio, eval_metric='logloss') model.fit(x, y) for month in test: x_test, y_test = test[month]['x'], test[month]['y'] proba = model.predict_proba(x_test)[:, 1] preds = model.predict(x_test) auc = round(roc_auc_score(y_test, proba), 4) f1 = round(f1_score(y_test, preds), 4) results = results.append(pd.Series({'crime': crime, 'model': model_type, 'data_type': data_type, 'month': month, 'f1': f1, 'auc': auc}), ignore_index=True) else: model = CrimeModel(32) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tf.keras.metrics.AUC()]) model.fit(x, y, epochs=5, batch_size=64, verbose=0, class_weight={0: 1, 1: weight_ratio}) for month in test: x_test, y_test = test[month]['x'], test[month]['y'] proba = model.predict(x_test, batch_size=64) preds = [1 if prob > 0.5 else 0 for prob in proba] auc = round(roc_auc_score(y_test, proba), 4) f1 = round(f1_score(y_test, preds), 4) results = results.append(pd.Series({'crime': crime, 'model': model_type, 'data_type': data_type, 'month': month, 'f1': f1, 'auc': auc}), ignore_index=True) return results
class Classifier: def __init__(self, model_type, **kwargs): self.model_type = model_type if model_type == 'LR': # Logistic Regression # modeling self.model = LogisticRegression(**kwargs) elif model_type == 'DT': # Decision Tree # modeling self.model = DecisionTreeClassifier(**kwargs) elif model_type == 'RF': # Random Forest # modeling self.model = RandomForestClassifier(**kwargs) elif model_type == 'XGB': # XGboost # default kwargs['objective'] = "binary:logistic" # modeling self.model = XGBClassifier(**kwargs) elif model_type == 'DNN': # Deep Neural Network # default params nb_features = kwargs['nb_features'] if 'nb_features' in kwargs.keys( ) else NameError("name 'nb_features' is not defined") nb_class = kwargs['nb_class'] if 'nb_class' in kwargs.keys( ) else NameError("name 'nb_class' is not defined") nb_layers = kwargs['nb_layers'] if 'nb_layers' in kwargs.keys( ) else NameError("name 'nb_layers' is not defiend") loss = 'categorical_crossentropy' if nb_class > 2 else 'binary_crossentropy' act_func = 'softmax' if nb_class > 2 else 'sigmoid' # modeling input_ = tf.keras.layers.Input(shape=(nb_features, )) x = input_ for i in range(len(nb_layers)): x = tf.keras.layers.Dense(nb_layers[i], activation='relu')(x) output = tf.keras.layers.Dense(nb_class, activation=act_func)(x) self.model = tf.keras.models.Model(input_, output) # complie self.model.compile(optimizer=kwargs['optimizer'], loss=loss, metrics=['acc']) def train(self, X, y, savedir=None, **kwargs): # set evaluation dataset when model selected as XGB if self.model_type == 'XGB': kwargs['eval_set'] = [(X, y)] # model training self.model.fit(X, y, **kwargs) # save model if savedir != None: # check save directory if not os.path.isdir('../saved_models'): os.mkdir('../saved_models') # model save to pickle except DNN if self.model_type == 'DNN': self.model.save(savedir) else: pickle.dump(self.model, open(savedir, "wb"))
## Adding the third hidden layer #classifier.add(Dense(units = Nh, kernel_initializer = 'uniform', activation = 'relu')) ##classifier.add(LeakyReLU(alpha=0.1)) #classifier.add(Dropout(0.01)) # Adding the output layer classifier.add( Dense(units=1, kernel_initializer='uniform', activation='sigmoid')) classifier.add(Dropout(0.01)) # Define the optimizer optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0) # Compile the model classifier.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"]) # Set a learning rate annealer learning_rate_reduction = ReduceLROnPlateau(monitor='acc', patience=3, verbose=1, factor=0.5, min_lr=0.00001) # Fitting the ANN to the Training set history = classifier.fit(X1, y, batch_size=25, epochs=10000, callbacks=[learning_rate_reduction])
# Neural Network Architecture # Create initial set of linear layers model=Sequential() # Now, add to our linear layers and note their neurons in each added layer # Input dimension only needs to be noted for the first layer and it is the number of features/columns model.add(Dense(input_dim=17, units=8, activation='relu', name='output_1')) model.add(Dense(units=16, activation='relu', name='output_2')) # Make sure output later has two neurons for each type of classification of attrition model.add(Dense(units=2, activation='sigmoid')) # Compile the Network # More information on optimizer types: # https://keras.io/optimizers/ model.compile(optimizer=Adam(lr=0.01), loss='binary_crossentropy', metrics=['accuracy']) # loss='binary_crossentropy' specifies that your model should optimize the log # loss for binary classification. # metrics=['accuracy'] specifies that accuracy should be printed out # Review NN configuration model.summary() History = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, verbose=1) model.predict_classes(X_test) #model.predict(X_test) # Log Loss over time plt.plot(History.history['loss'])
X_train_validate_nn = X_train_validate model = K.Sequential() model.add( K.layers.Dense(128, input_dim=X_train_nn.shape[1], activation='sigmoid')) model.add(K.layers.Dense(64, activation='sigmoid')) model.add(K.layers.Dense(32, activation='sigmoid')) model.add(K.layers.Dense(1)) model.summary() model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'], loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None) model.fit(x=X_train_nn, y=y_train, batch_size=None, epochs=15, verbose=1, callbacks=None, validation_split=0.0, validation_data=None, shuffle=True, class_weight=None, sample_weight=None,
# Initialising the ANN classifier = Sequential() # Adding the input layer and the first hidden layer classifier.add(Dense(output_dim = 6, init = 'uniform', activation = 'relu', input_dim = 8)) # Adding the second hidden layer classifier.add(Dense(output_dim = 6, init = 'uniform', activation = 'relu')) # Adding the third hidden layer classifier.add(Dense(output_dim = 6, init = 'uniform', activation = 'relu')) # Adding the output layer classifier.add(Dense(output_dim = 1, init = 'uniform', activation = 'sigmoid')) # Compiling the ANN classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy']) # Fitting the ANN to the Training set classifier.fit(X_train, y_train, batch_size = 3, nb_epoch = 100) # Part 3 - Making the predictions and evaluating the model # Predicting the Test set results y_pred = classifier.predict(X_test) y_pred = (y_pred > 0.5) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm_ann = confusion_matrix(y_test, y_pred)
from keras.callbacks import EarlyStopping num_feats = len(x_train[0]) model = Sequential() early_stop = EarlyStopping(monitor='loss', patience=0, verbose=1, min_delta=0.005, mode='auto') model.add(Dense(num_feats, activation='relu', input_dim=(num_feats))) model.add(Dropout(0.50)) model.add(Dense(500, activation='relu', kernel_initializer='uniform')) model.add(Dropout(0.50)) model.add(Dense(2, kernel_initializer='uniform', activation='softmax')) model.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy'], optimizer='adam') model.fit(x_train, y_train, epochs=25, verbose=1, callbacks=[early_stop]) y_pred = model.predict_classes(x_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy: %.2f%%" % (accuracy * 100.0))
y_train = to_categorical(y_train, num_classes) y_test = to_categorical(y_test, num_classes) patience = 16 early_stop = EarlyStopping(monitor='loss', patience=patience, verbose=1, min_delta=0.005, mode='auto') model_save = ModelCheckpoint("best_model.hdf5",monitor='loss', verbose = 0, save_best_only =True, save_weights_only = False, mode ='auto', period =1) reduce_LR = ReduceLROnPlateau(monitor='loss', factor= 0.1, patience=(patience/2), verbose = 0, min_delta=0.005,mode = 'auto', cooldown=0, min_lr=0) model = Sequential() model.add(Dense(num_feats,activation='relu',input_dim=(num_feats))) model.add(Dropout(0.5)) model.add(Dense(int((num_feats+num_classes)/2), activation='relu', kernel_initializer='uniform')) model.add(Dropout(0.5)) model.add(Dense(num_classes, kernel_initializer='uniform', activation='softmax')) model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer='adam') model.fit(x_train, y_train, epochs=100, verbose=1, callbacks=[early_stop, reduce_LR]) else: raise Exception('Unrecognized Model. Use XGB, SVM or ANN') if(model_type == 'ANN'): results = ann_1d(model, x_test, y_test, 0) #OBOResults = ann_1d(model, x_test, y_test, 1) else: results = xgb_tester(model, x_test, y_test, 0) #OBOResults = xgb_tester(model, x_test, y_test, 1) #window_scores.append(OBOResults[0]) mcc_scores.append(results[1])
classifier = Sequential() # Adding the input layer and the first hidden layer classifier.add(Dense(activation="relu", input_dim=1000, units=300)) classifier.add(Dropout(rate=0.05)) # Adding the second hidden layer classifier.add(Dense(activation="sigmoid", units=300)) classifier.add(Dropout(rate=0.05)) # Adding the output layer classifier.add(Dense(activation="softmax", units=9)) # Compiling the ANN classifier.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) # Fitting the ANN to the Training set classifier.fit(x_train, y_train, batch_size=300, epochs=500) classifier.fit(X, y, batch_size=300, epochs=100) # Making the predictions and evaluating the model # Predicting the Test set results y_pred = classifier.predict(x_test) y_pred = classifier.predict(X_test) # Evaluating the model # turn probabilities into class predictions import numpy as np
#score=0.52721 #%% import keras from keras.layers import Dense, Dropout from keras.models import Sequential from keras.optimizers import Adam from keras.utils import to_categorical #%% model = Sequential() model.add(Dense(200, input_shape=(x_train.shape[1], ))) model.add(Dense(200)) model.add(Dropout(0.2)) model.add(Dense(150)) model.add(Dense(50)) model.add(Dense(6, activation='softmax')) #%% model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc']) #%% model.fit(x_train, y_train, epochs=1000) #%% prediction = model.predict_classes(x_test) #%% y_test = [i.argmax() for i in y_test] #%% score = f1_score(y_test, prediction, average='weighted') print(score)
def train_non_swat_model(segment, tab, authUser, authPw, urlPrefix): try: df = segment['segment_tbl'].to_frame() df = df[df['_PartInd_'] == 1] df = df.select_dtypes(exclude=['object']) imputed_cols = [col for col in df.columns if "IMP" in col] df = df[imputed_cols] target = "IMP_" + tab.children[0].children[3].children[0].children[0].value imputed_cols.remove(target) X_train = df[imputed_cols] y_train = df[target] model = None except: print("could not load segment data") segment['modelObj'] = None return None if segment['model'] =='XGBoost': model = XGBClassifier(**segment['train_params']) model.fit(X_train, y_train) elif segment['model'] =='TensorFlow': 'TANH', 'EXP', if segment['train_params']['tf_acts'] == 'RECTIFIER': act = 'relu' elif segment['train_params']['tf_acts'] == 'TANH': act = 'tanh' else: act = 'exponential' hid_layers = list() hid_num = 4 if segment['train_params']['tf_hidden'] != '': hid_num = int(segment['train_params']['tf_hidden']) for hidden in range(hid_num): hid_layers.append(tf.keras.layers.Dense(units=32, activation = act)) model = tf.keras.models.Sequential([tf.keras.layers.Dense(units=128, activation = act,input_shape=(X_train.shape[-1],))] + hid_layers + [tf.keras.layers.Dense(1, activation = 'sigmoid')]) # use binary_crossentropy loss function for binary target model.compile(optimizer = 'adam', loss='binary_crossentropy', metrics = ['acc']) # train a model #X_train = (X_train - X_train.mean()) / (X_train.max() - X_train.min()) sm = SMOTE(random_state=10) X_train, y_train = sm.fit_sample(X_train.as_matrix(), y_train) model.fit(X_train,y_train, epochs=9, verbose=0) elif segment['model'] == 'AutoML': var_list = list() for col in list(segment['session'].CASTable(tab.children[0].children[1].value, caslib='Public').columns): var_list.append({"name" : col}) projectName = 'MLPA_test_' + str(random.randint(1,100)) + random.choice(string.ascii_letters) segment['session'].table.partition(table={'name' : segment['segment_tbl'].name, "vars" : var_list, "where" : "_PartInd_ = 1"}, casout={'name' : projectName + "_Train", 'promote' : True, 'caslib' : 'CASUSER({})'.format(authUser)}) # ----------------------------------------------- # Get authentication token # ----------------------------------------------- authCred = 'password&username='******'&password='******'Authorization': oauthToken, 'Accept': "application/vnd.sas.analytics.ml.pipeline.automation.project+json", 'Content-Type': "application/json" } payload = { 'dataTableUri': '/dataTables/dataSources/cas~fs~cas-shared-default~fs~CASUSER({})/tables/'.format(authUser) + projectName + "_Train", 'type': 'predictive', 'name': projectName, 'description': 'Project generated for test', 'settings': { 'autoRun': True, 'modelingMode': 'Standard', 'maxModelingTime': segment['train_params']['ntime'] }, 'analyticsProjectAttributes': { 'targetVariable': tab.children[0].children[3].children[0].children[0].value } } payload_data = json.dumps(payload, indent=4) # Create new MLPA project and run pipeline mlpaProject = executeRestCallWithPayload(urlPrefix, "POST", tokenUri, payload_data, headers) # ----------------------------------------------- # Poll every 5 seconds until MLPA project state is completed # ----------------------------------------------- projectStateLink = list(filter(lambda x: x["rel"] == "state", mlpaProject["links"]))[0] headers = { 'Authorization': oauthToken, 'Accept': projectStateLink["type"] } segment['mlpaProject'] = mlpaProject segment["projectStateLink"] = projectStateLink segment["oauthToken"] = oauthToken segment['projectName'] = projectName segment['modelObj'] = model
print('1- Load previous model') print('2- Train model') option = input('Choose option: ') if option == '2': # Create de model model = Sequential() # Add layers to the model model.add(Dense(64, input_dim=7, activation='relu')) model.add(Dense(32, activation='relu')) model.add(Dense(1, activation='sigmoid')) # Compile the model with an Adam optimizar and a learning rate of 0.02 model.compile(loss='binary_crossentropy', optimizer=RMSprop(lr=0.0001)) checkpoint = ModelCheckpoint('keras_models/weights.hdf5', monitor='val_loss', verbose=0, save_best_only=True, mode='auto') # Train the model history = model.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=4, epochs=100, callbacks=[checkpoint]) model.load_weights('keras_models/weights.hdf5') save_model(model)