x_train, x_test, y_train, y_test = train_test_split(x_vectors, encoded) print('x train shape') print(x_train.shape) print('y train shape') print(y_train.shape) #Building model model = Sequential() model.add( Bidirectional(LSTM(x_train.shape[2]), input_shape=(x_train.shape[1], x_train.shape[2]))) model.add(Dense(y_train.shape[1], activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(x_train, y_train, epochs=70, batch_size=1, verbose=2) model.save_model('my_model.h5') #model=load_model('my_model.h5') # predictions=model.predict(x_test) # print(predictions)
support_model.add(PReLU(1024)) support_model.add(Dropout(0.4)) support_model.add(Dense(1024,48,activation='softmax')) trainer = Adadelta(lr = 4.0 , rho = 0.97 , epsilon = 1e-8 ) support_model.compile(loss = 'categorical_crossentropy' , optimizer = trainer) try: for i in range(epoch): support_model.fit(new_X_cv[0] , Y_cv[0] , batch_size = 256,nb_epoch=1,shuffle=True,validation_split=0.0,show_accuracy=False) support_model.evaluate(new_X_cv[1],Y_cv[1] , show_accuracy=True) except KeyboardInterrupt: print('Stop') """ xg_train = xgb.DMatrix( new_X_cv[0], label=[y.index(1) for y in Y_cv[0].tolist()]) xg_cv = xgb.DMatrix(new_X_cv[1] , label=[y.index(1) for y in Y_cv[1].tolist()]) param = {} # use softmax multi-class classification param['objective'] = 'multi:softmax' # scale weight of positive examples param['eta'] = 0.15 param['max_depth'] = 5 param['silent'] = 1 param['nthread'] = 6 param['num_class'] = 48 param['subsample'] = 0.7 num_round = 20
def stacking(clf, train_x, train_y, test_x, clf_name, scale=None): train = np.zeros((train_x.shape[0], 1)) test = np.zeros((test_x.shape[0], 1)) test_pre = np.empty((folds, test_x.shape[0], 1)) cv_scores = [] for i, (train_index, test_index) in enumerate(kf): tr_x = train_x[train_index] tr_y = train_y[train_index] te_x = train_x[test_index] te_y = train_y[test_index] if clf_name in ["rf", "ada", "gb", "et", "lr", "lsvc", "knn"]: clf.fit(tr_x, tr_y) pre = clf.predict(te_x).reshape(-1, 1) train[test_index] = pre test_pre[i, :] = clf.predict(test_x).reshape(-1, 1) cv_scores.append(mean_absolute_error(te_y, pre)) elif clf_name in ["xgb"]: train_matrix = xgb.DMatrix(tr_x, label=tr_y) test_matrix = xgb.DMatrix(te_x, label=te_y) z = xgb.DMatrix(test_x) params = { 'eta': 0.015, 'objective': 'reg:linear', 'eval_metric': 'mae', 'min_child_weight': 1.5, 'colsample_bytree': 0.2, 'max_depth': 7, 'lambda': 0.3, 'alpha': 0.6, 'silent': 1 } num_round = 10000 early_stopping_rounds = 100 watchlist = [(train_matrix, 'train'), (test_matrix, 'eval')] if test_matrix: clf = xgb.train(params, train_matrix, num_boost_round=num_round, evals=watchlist, early_stopping_rounds=early_stopping_rounds) pre = clf.predict(test_matrix, ntree_limit=clf.best_ntree_limit).reshape( -1, 1) train[test_index] = pre test_pre[i, :] = clf.predict( z, ntree_limit=clf.best_ntree_limit).reshape(-1, 1) cv_scores.append(mean_absolute_error(te_y, pre)) elif clf_name in ["lgb"]: train_matrix = lgb.Dataset(tr_x, label=tr_y) test_matrix = lgb.Dataset(te_x, label=te_y) # z = clf.Dataset(test_x, label=te_y) # z=test_x params = { 'max_bin': 10, 'learning_rate': 0.0021, 'boosting_type': 'gbdt', 'objective': 'regression', 'metric': 'l1', 'sub_feature': 0.345, 'bagging_fraction': 0.85, 'bagging_freq': 40, 'num_leaves': 512, 'min_data': 500, 'min_hessian': 0.05, 'verbose': 0, 'feature_fraction_seed': 2, 'bagging_seed': 3 } num_round = 10000 early_stopping_rounds = 100 if test_matrix: clf = lgb.train(params, train_matrix, num_round, valid_sets=test_matrix, early_stopping_rounds=early_stopping_rounds) pre = clf.predict(te_x, num_iteration=clf.best_iteration).reshape( -1, 1) train[test_index] = pre test_pre[i, :] = clf.predict( test_x, num_iteration=clf.best_iteration).reshape(-1, 1) cv_scores.append(mean_absolute_error(te_y, pre)) elif clf_name in ["nn"]: from keras.layers import Dense, Dropout, BatchNormalization from keras.optimizers import SGD, RMSprop from keras.callbacks import EarlyStopping, ReduceLROnPlateau from keras.utils import np_utils from keras.regularizers import l2 from keras.models import Sequential clf = Sequential() clf.add( Dense(64, input_dim=tr_x.shape[1], activation="relu", W_regularizer=l2())) # model.add(Dropout(0.2)) clf.add(Dense(64, activation="relu", W_regularizer=l2())) # model.add(Dropout(0.2)) clf.add(Dense(1)) clf.summary() early_stopping = EarlyStopping(monitor='val_loss', patience=20) reduce = ReduceLROnPlateau(min_lr=0.0002, factor=0.05) clf.compile(optimizer="rmsprop", loss="mae") clf.fit(tr_x, tr_y, batch_size=2560, nb_epoch=5000, validation_data=[te_x, te_y], callbacks=[early_stopping, reduce]) pre = clf.predict(te_x).reshape(-1, 1) train[test_index] = pre test_pre[i, :] = clf.predict(test_x).reshape(-1, 1) cv_scores.append(mean_absolute_error(te_y, pre)) else: raise IOError("Please add new clf.") print "%s now score is:" % clf_name, cv_scores with open("data/score.txt", "a") as f: f.write("%s now score is:" % clf_name + str(cv_scores) + "\n") if clf_name in ["nn"]: clf.save('data/model/' + clf_name + '_layer_1_fold_' + str(i) + '.h5') pickle.dump( scale, open( 'data/model/' + clf_name + '_layer_1_fold_' + str(i) + '_scale.pkl', 'wb')) elif clf_name in ["xgb"]: clf.save_model('data/model/' + clf_name + '_layer_1_fold_' + str(i) + '.model') pd.Series.to_csv( pd.Series(clf.best_ntree_limit), 'data/model/' + clf_name + '_layer_1_fold_' + str(i) + '_best_ntree_limit.conf') elif clf_name in ["lgb"]: clf.save_model('data/model/' + clf_name + '_layer_1_fold_' + str(i) + '.model', num_iteration=clf.best_iteration) else: pickle.dump( clf, open( 'data/model/' + clf_name + '_layer_1_fold_' + str(i) + '.pkl', 'wb')) test[:] = test_pre.mean(axis=0) print "%s_score_list:" % clf_name, cv_scores print "%s_score_mean:" % clf_name, np.mean(cv_scores) with open("data/score.txt", "a") as f: f.write("%s_score_mean:" % clf_name + str(np.mean(cv_scores)) + "\n") return train.reshape(-1, 1), test.reshape(-1, 1)
def train(): train_dir = 'data/train' val_dir = 'data/test' num_train = 28709 num_val = 7178 batch_size = 64 num_epoch = 50 train_datagen = ImageDataGenerator(rescale=1. / 255) val_datagen = ImageDataGenerator(rescale=1. / 255) train_generator = train_datagen.flow_from_directory( train_dir, target_size=(48, 48), batch_size=batch_size, color_mode="grayscale", class_mode='categorical') validation_generator = val_datagen.flow_from_directory( val_dir, target_size=(48, 48), batch_size=batch_size, color_mode="grayscale", class_mode='categorical') # Create the model model = Sequential() model.add( Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(48, 48, 1))) model.add(Conv2D(64, kernel_size=(3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Conv2D(128, kernel_size=(3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(128, kernel_size=(3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(1024, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(7, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.0001, decay=1e-6), metrics=['accuracy']) model_info = model.fit_generator(train_generator, steps_per_epoch=num_train // batch_size, epochs=num_epoch, validation_data=validation_generator, validation_steps=num_val // batch_size) plot_model_history(model_info) model.save_model('model1.h5')
plt.tight_layout() plt from sklearn.metrics import classification_report, confusion_matrix #進行預測並製作混淆矩陣 Y_pred = model.predict(X_test) print("Y_pred:", Y_pred) y_pred = np.argmax(Y_pred, axis=1) print("Y_pred:", y_pred) ''' # (or) y_pred = model.predict_classes(X_test) print("Y_pred:",y_pred) ''' p = model.predict_proba(X_test) # to predict probability target_names = ['class 0(Flowers)', 'class 1(Dogs)'] print( classification_report(np.argmax(Y_test, axis=1), y_pred, target_names=target_names)) print(confusion_matrix(np.argmax(Y_test, axis=1), y_pred)) # 儲存模型 fname = "weights-Test-CNN.hdf5" model.save_model(fname, overwrite=True)