def main(): # files should be a list of string file names files = ["rt-polaritydata/rt-polarity.neg", "rt-polaritydata/rt-polarity.pos"] print('Loading data...') phrases, labels = load_data(files) print('Preprocessing data...') data = data_to_embedding(phrases, sent_len=51) # splitting into test (60%) validation(20%) and test (20%) x_first_split, x_test, y_first_split, y_test = train_test_split(data, labels, test_size=0.2) x_train, x_val, y_train, y_val = train_test_split(x_first_split, y_first_split, test_size=0.2) # --------------- simple way to make a model, train and test it ------------------ print('Training the model...') model = KerasClassifier(build_fn=create_model, epochs=4, dropout=0.2, input_dim=5100, verbose=0) model.fit(x_train, y_train) # -------------- example cross validation ----------------------- seed = 7 kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed) # do cross validation on only the training and validation set (i.e. x_first_split) results = cross_val_score(model, x_first_split, y_first_split, cv=kfold) print("average result:{0} , std: {1}".format(results.mean(),results.std())) # -------------- finally, produce predictions on test set ------ preds = model.predict(x_test) acc = accuracy_score(y_test, preds) print(acc * 100)
def stacking_NN(S_train, y_train, cv=5, epochs=20, deep = False) : def stack_fn(num_models=len(S_train[0])): model = Sequential() model.add(Dense(16, input_dim=num_models, activation='relu')) model.add(Dense(2, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model def stack_fn2(num_models=len(S_train[0])): model = Sequential() model.add(Dense(8, input_dim=num_models, activation='relu')) model.add(Dense(8, input_dim=8, activation='relu')) model.add(Dense(2, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model if deep : meta_model = KerasClassifier(build_fn=stack_fn2) else : meta_model = KerasClassifier(build_fn=stack_fn) meta_model.fit(S_train, y_train, epochs=epochs) return meta_model
def param_tune(self): """ Creates, fits, and predicts a model multiple times with every combination of hyperparameters, given below, in an attempt to fine-tune the model using more precise possibilities than the random tuning above. """ best_params = () best_acc = 0 for comb in list( product( [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000 ], # epochs [1, 3, 5, 7, 9], # batch_size ['glorot_uniform', 'normal', 'uniform'], #init ['rmsprop', 'adam'])): #optimizer auto = KerasClassifier(build_fn=self.model_build, epochs=comb[0], batch_size=comb[1], init=comb[2], optimizer=comb[3]) auto.fit(self.X_train, self.y_train) predictions = auto.predict(self.X_test) predictions = np_utils.to_categorical(predictions) accu_test = np.sum(self.y_test == predictions) / self.y_test.size if accu_test > best_acc: best_params = comb best_acc = accu_test self.results.write("Param Tune Results\n") self.results.write(str(best_params) + "\n") self.results.write(str(best_acc) + "\n")
def random_param_tune(self): """ Creates, fits, and predicts a model multiple times with random combinations of hyperparameters, given below, in an attempt to find the best set of hyperparameters from a wide range of possibilities. """ best_params = () best_acc = 0 all_comb = list( product( [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000], # epochs [1, 3, 5, 7, 9], # batch_size ['glorot_uniform', 'normal', 'uniform'], #init ['rmsprop', 'adam'])) #optimizer if len(all_comb) > 250: all_comb = sample(all_comb, 250) for comb in all_comb: auto = KerasClassifier(build_fn=self.model_build, epochs=comb[0], batch_size=comb[1], init=comb[2], optimizer=comb[3]) auto.fit(self.X_train, self.y_train) predictions = auto.predict(self.X_test) predictions = np_utils.to_categorical(predictions) accu_test = np.sum(self.y_test == predictions) / self.y_test.size if accu_test > best_acc: best_params = comb best_acc = accu_test self.results.write("Random Param Tune Results\n") self.results.write(str(best_params) + "\n") self.results.write(str(best_acc) + "\n")
def train(X, y, alg, scaler, pca, features, seed=7): """ Trains a new model using the training data. """ np.random.seed(seed) if scaler is not None: X = scaler.transform(X) if pca is not None: X = pca.transform(X) if alg == "deep": model = KerasClassifier(build_fn=create_deepmodel, nb_epoch=2, batch_size=2, verbose=1) else: print 'No model defined for ' + alg exit() # train model on full data set t0 = time.time() kfold = StratifiedKFold(y=y, n_folds=3, shuffle=True, random_state=seed) results = cross_val_score(model, X, y, cv=kfold) print(results.mean()) # evaluate model - how accurate is the model # rating = model.evaluate(X, y, verbose=1) # print " - %s: %.2f" % (model.metrics_names[1], rating[1]) model.fit(X, y, verbose=1, batch_size=1) return model
class Keras(BaseEstimator): def __init__(self, build_function, multi_class=False, keras_params = None): if not callable(build_function): raise ValueError('Model construction function must be callable.') self.multi_class = multi_class self.build_function = build_function if keras_params is None: keras_params = {} self.keras_params = keras_params def fit(self, X, y): if self.multi_class: self.n_classes_ = len(set(y)) else: self.n_classes_ = 1 build_callable = lambda: self.build_function(X.shape[1], self.n_classes_) keras_params=copy(self.keras_params) keras_params['build_fn']=build_callable self.classifier_ = KerasClassifier(**keras_params) self.classifier_.fit(X, y) def predict(self, X): return self.classifier_.predict(X)
class Keras(BaseEstimator): def __init__(self, build_function, multi_class=False, keras_params=None): if not callable(build_function): raise ValueError('Model construction function must be callable.') self.multi_class = multi_class self.build_function = build_function if keras_params is None: keras_params = {} self.keras_params = keras_params def fit(self, X, y): if self.multi_class: self.n_classes_ = len(set(y)) else: self.n_classes_ = 1 build_callable = lambda: self.build_function(X.shape[1], self. n_classes_) keras_params = copy(self.keras_params) keras_params['build_fn'] = build_callable self.classifier_ = KerasClassifier(**keras_params) self.classifier_.fit(X, y) def predict(self, X): return self.classifier_.predict(X)
class neural_estimator(): def __init__(self, x, y, size=64): self.x = x self.y = np_utils.to_categorical(y) self.size = size self.model = KerasClassifier(build_fn=self.create_model, epochs=50, batch_size=512, verbose=0) #self.create_model_() def create_model(self): model = keras.Sequential([ keras.layers.Flatten(input_shape=(self.x.shape[1], )), keras.layers.BatchNormalization(), keras.layers.Dense(self.size, activation=tf.nn.relu), keras.layers.BatchNormalization(), keras.layers.Dense(self.size, activation=tf.nn.relu), keras.layers.BatchNormalization(), keras.layers.Dense(self.y.shape[1], activation=tf.nn.softmax) ]) model.compile(tf.keras.optimizers.Adam(), loss='categorical_crossentropy', metrics=['accuracy']) return model def fit(self): self.model.fit(self.x, self.y) return self.model
def add_candidate_feat(self, X_train, X_test, y_train, y_test, constructor_kwargs, scorer=mcc): """ Build, fit, and score a model using a subset of input features plus one candidate features @params: X_train - Required : Pandas dataframe containing training set input data (Dataframe) X_test - Required : Pandas dataframe containing test set input data (Dataframe) y_train - Required : Pandas dataframe containing training set labels (Dataframe) y_test - Required : Pandas dataframe containing test set labels (Dataframe) constructor_kwargs - Required : kwargs parameterizing for the model constructor function, except for n_features scorer - Optional : Metric which accepts true and predicted labels as inputs; used to score model """ # Create compatibility-wrapped model with dim(X_train) input features, then fit and score it model = KerasClassifier(build_fn=construct_network, n_features=len(X_train.columns.values), **constructor_kwargs) model.fit(X_train, y_train) score = scorer(y_test, model.predict(X_test)) cm = confusion_matrix(y_test, model.predict(X_test)) return score, cm
def automl_basic(X_train, X_test, y_train, y_test, baseline, min_neurons, max_neurons, max_layers, num_runs=3): accuracy_scores = defaultdict(list) for layers_neurons in itertools.product(range(max_layers), range(min_neurons, max_neurons)): layers = layers_neurons[0] neurons = layers_neurons[1] print("Number of hidden layers", layers) for i in range(num_runs): deep_broad_model = partial(baseline, neurons, layers) estimator = KerasClassifier(build_fn=deep_broad_model, epochs=100, batch_size=5, verbose=0) estimator.fit(X_train, y_train) y_pred = estimator.predict(X_test) accuracy_scores[layers_neurons].append( metrics.accuracy_score(y_test, y_pred)) return accuracy_scores
def start_fit(dataSet): index = [i for i in range(len(dataSet))] random.shuffle(index) data = dataSet[index] X = dataSet[:, 0:148] Y = dataSet[:, 148] X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0) # normalization scaler = StandardScaler().fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) dbn_model = KerasClassifier(model_init, epochs=500, batch_size=64, verbose=0) dbn_model.fit(X_train, y_train) y_ped = dbn_model.predict(X_test) acc, precision, npv, sensitivity, specificity, mcc, f1 = calculate_performace( len(y_ped), y_ped, y_test) print( 'DBN:acc=%f,precision=%f,npv=%f,sensitivity=%f,specificity=%f,mcc=%f,roc_auc=%f' % (acc, precision, npv, sensitivity, specificity, mcc, roc_auc))
def train(self, features, answers): """ Entrainement du classifier MLP Args: features: Array de données answers: Array de label ValidationMethod: Type de validation à utiliser """ print("1.Training") mlpPerf = [['Epoch', 'Batch Size', 'Accuracy']] model = KerasClassifier(build_fn=self.create_model, verbose=0) # Fix answer array answers = np_utils.to_categorical(answers) #Fit data to algo model.fit(features, answers) #Save results mlpPerf.append([ self.epoch, self.batch_size, "{0:.2f}".format(model.score(features, answers) * 100) ]) self.precision.append(model.score(features, answers)) self.best_score = self.precision[0] #Print table print(Tabulate(mlpPerf, headers='firstrow')) print() return model
def explain_row_eli5(): global map_values_eli5 # compute explanations only once if bool(map_values_eli5): return map_values_eli5 copy_model = tf.keras.models.load_model('{}/{}.h5'.format(name, name), custom_objects={"f1": kr.f1}) def base_model(): return copy_model my_model = KerasClassifier(build_fn=base_model) my_model.fit(X_test.copy(), y_test.copy()) perm = PermutationImportance(my_model).fit(X_test.copy(), y_test.copy()) # eli5.show_weights(perm, feature_names=list(df.drop('loan_repaid', axis=1).columns)) s = perm.feature_importances_ sorted_indices = sorted(range(len(s)), key=lambda k: s[k], reverse=True) class_1 = [(a, s[a]) for a in sorted_indices if s[a] > 0] sorted_indices = sorted(range(len(s)), key=lambda k: s[k]) class_0 = [(a, s[a] * -1) for a in sorted_indices if s[a] <= 0] for class_value in range(max(y_test)): map_values_eli5[class_value] = class_1 return map_values_eli5
def fit(self, model, c_m): #class_weight = CW.compute_class_weight('balanced', np.unique(self.y[0]), self.y[0]) model = KerasClassifier(build_fn=c_m, epochs=200, batch_size=10, verbose=0) #class_weight = class_weight, # optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam'] # batch_size = [100] # epochs = [50] # learn_rate = [0.001] # activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear'] # momentum = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9] # init_mode = ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', # 'he_uniform'] kfold = KFold(n_splits=10, shuffle=True, random_state=1234) neurons = [1, 5, 10, 15, 20, 25, 30] # #param_grid = dict(neurons=neurons) #grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1) #grid_result = grid.fit(self.X, self.y) results = cross_val_score(model, self.X, self.y, cv=kfold) model.fit(self.X, self.y) y_pred = model.predict(self.X, batch_size=128, verbose=1) # pdb.set_trace() # y_pred = model.predict_classes(self.X, verbose=1) #y_pred = cross_val_score.score #y_true = self.y print("Baseline: %.2f%% (%.2f%%)" % (results.mean() * 100, results.std() * 100)) print(y_pred) return y_pred
def train(self, filepath, target, feature_columns, seed): data = get_data_from_file(filepath) target_data = data[target] #getting the relevant features. data = data[feature_columns] train, test = split(data, 0.8) train_target, test_target = split(target_data, 0.8) #Set the seed for random function np.random.seed(seed) #Get one hot encoded data_set. one_hot_encoded_target = get_one_hot_encoded_target_columns(train_target) a, b = get_num_unique_targets(train_target), get_num_feature(train) estimator = KerasClassifier(build_fn=baseline_model, nb_epoch=200, batch_size=5, verbose=0) #Fit the model estimator.fit(train, target) pickle.dump(estimator, open(MODEL_SAVE_FILE, 'wb')) estimator = KerasClassifier(build_fn=baseline_model, nb_epoch=200, batch_size=5, verbose=0) #Do K fold cross Validation. kcross_validation(estimator, train, target, seed)
def DeepLearningClassifer_(self): """ディープラーニング分類実行""" from keras.callbacks import EarlyStopping from keras.wrappers.scikit_learn import KerasClassifier import tensorflow as tf """GPU使用率の設定""" config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 1.0 session = tf.Session(config=config) estimator = None """分析グリッドサーチ実行フラグに応じて推定器作成""" if True == self.do_analysis_gridsearch: estimator = KerasClassifier(build_fn=self._make_cls_deepleaning_model) estimator = self.make_grid_search_estimator(estimator, scoring='accuracy') else: estimator = KerasClassifier(build_fn=self._make_cls_deepleaning_model, n_hidden=self.params[PARAM_NHIDDEN][0], n_unit=self.params[PARAM_NUNIT][0], keep_drop=self.params[PARAM_KEEPDROP][0]) estimator.fit(np.array(self.X_train), np.array(self.y_train), batch_size=self.params[PARAM_BATCHSIZE][0], epochs=100000, shuffle=False, validation_data=(np.array(self.X_test), np.array(self.y_test)), callbacks=[EarlyStopping(patience=3)]) """バギング/アダブースト推定器作成""" estimator = self.make_bagada_cls_estimator(estimator) return estimator
def fit_neural_network(y_train=None, X_train=None, data=None, layers=None, n_epochs=100, n_batch_size=20): """Fit a neural network regressor. Args: layers (str): str specifying the number of hidden layers and hidden nodes in the neural network. Example: "100-100-100". n_epochs (int): Number of epochs used for model fitting. n_batch_size (int): Batch size used for model fitting. Returns: nnet: The fitted neural network. """ if data is not None: y_train = data["y_train"] X_train = data["X_train"] build_model = _get_build_model_func(input_dim=X_train.shape[1], layers=layers) nnet = KerasClassifier(build_fn=build_model, batch_size=n_batch_size, epochs=n_epochs) nnet._estimator_type = "classifier" nnet.fit(X_train, y_train, verbose=False) return nnet
def cross_eval_dnn(dataset_name, outfolder, model_descriptor: str, cpus, nfold, X_data, y_data, embedding_layer_max_index, pretrained_embedding_matrix=None, instance_data_source_tags=None, accepted_ds_tags: list = None): print("== Perform ANN ...") subfolder = outfolder + "/models" try: os.stat(subfolder) except: os.mkdir(subfolder) create_model_with_args = \ functools.partial(create_model, max_index=embedding_layer_max_index, wemb_matrix=pretrained_embedding_matrix, model_descriptor=model_descriptor) # model = MyKerasClassifier(build_fn=create_model_with_args, verbose=0) model = KerasClassifier(build_fn=create_model_with_args, verbose=0, batch_size=100) model.fit(X_data, y_data) nfold_predictions = cross_val_predict(model, X_data, y_data, cv=nfold) util.save_scores(nfold_predictions, y_data, None, None, model_descriptor, dataset_name, 3, outfolder, instance_data_source_tags, accepted_ds_tags)
def run_exp_nn(X_train, y_train, X_val, y_val, param_name, param_range, other_params): result = defaultdict(list) ''' ########## BEST FOUND PARAMETERS from HW1 ##### n1 = 75 n2 = 14 mid_act = 'relu' # useleakyrelu is enabled... num_layers = 3 optimizer = 'adam' activation = 'sigmoid' epo = 100 # 10 bat = 44 # 18 ############################################## ''' for param in param_range: clear_session() result['param'].append(param) params = {param_name: param} params.update(other_params) result['params'].append(params) result['metrics'].append('accuracy') # Motions t0 = time.time() num_features = X_train.shape[1] print('num_features = {}'.format(num_features)) def classification_model(n1=75, n2=14, n3=14, num_layers=3, input_dim=num_features, optimizer='adam', activation='sigmoid', epo=100, bat=44): model = Sequential() model.add(Dense(n1, input_dim=64)) model.add(LeakyReLU()) model.add(Dense(n2)) model.add(LeakyReLU()) for i in range(num_layers - 2): model.add(Dense(n3)) model.add(LeakyReLU()) model.add(Dense(4, activation=activation)) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) return model model = KerasClassifier(build_fn=classification_model, verbose=0, **params) model.fit(X_train, y_train.values.ravel('C')) y_pred = model.predict(X_train) y_val_pred = model.predict(X_val) result['accuracy_m'].append(accuracy_score(y_val, y_pred)) result['accuracy_val_m'].append(accuracy_score(y_val, y_val_pred)) print("took {} seconds".format(time.time() - t0)) result['time'].append(time.time() - t0) # matplotlib is clunky in trying to plot bars side by side, BUT plot_lines1(result['param_range'], result['time'], result['param'], result['param_range'], label='Motions', col='blue') return result
class NeuralNetworkClassificationModelFast(object): """ Accepts SingleStockDataSet object as input and trains the benchmark model on train set and evaluate on test set. """ def __init__(self, dset, random_state=16): self.dset = dset self.random_state = random_state self.trainX, self.valX, self.testX, self.trainY, self.valY, self.testY = self.dset.get_train_val_test_sets( 0.8, 0.1, 0.1) self.predictions = None self.num_targets = self.trainY.shape[1] #self.build_regressor() def build_nn_arch(self): input_dim = self.trainX.shape[1] num_classes = 6 model = Sequential() model.add( Dense(80, input_dim=input_dim, init='normal', activation='relu')) #model.add(Dropout(0.1)) model.add(Dense(40, init='normal', activation='sigmoid')) #model.add(Dense(10, init='normal', activation='relu')) model.add(Dense(num_classes, init='normal', activation='sigmoid')) sgd = SGD(lr=0.4) #adam = Adam(lr=0.001) # Compile model model.compile(loss='binary_crossentropy', optimizer=sgd) return model def build_classifier(self): # fix random seed for reproducibility np.random.seed(self.random_state) # evaluate model with standardized dataset self.classifier = KerasClassifier(build_fn=self.build_nn_arch, nb_epoch=100, batch_size=8, verbose=0) def fit(self): self.build_classifier() self.classifier.fit(self.trainX, self.trainY[:, 1]) def predict(self): self.predictions = self.classifier.predict(self.testX) return self.predictions def score(self): scr = f1_score(self.testY[:, 1], self.predictions) print 'f1 score = %f' % scr return scr def evaluate(self): """ fits the model, predicts the targets and returns evaluation score """ self.fit() self.predict() return self.score() def to_categorical(self, y): # convert integers to dummy variables (i.e. one hot encoded) return np_utils.to_categorical(y)
def train(model_dir, data_dir, train_steps): # fix random seed for reproducibility seed = 7 numpy.random.seed(seed) # load pima indians dataset dataset_path = os.path.join(data_dir, 'pima-indians-diabetes.csv') # dataset_path = '../../data/pima-indians-diabetes.csv' dataset = numpy.loadtxt(dataset_path, delimiter=",", skiprows=1) # split into input (X) and output (Y) variables X = dataset[:, 0:8] Y = dataset[:, 8] # create model model = KerasClassifier(build_fn=create_model, epochs=150, batch_size=10, verbose=1) # evaluate using 10-fold cross validation, this does not fit an actual model, it just evaluates. # kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed) # results = cross_val_score(model, X, Y, cv=kfold) # print(results) # print(results.mean()) model.fit(X, Y) model.model.save(os.path.join(model_dir, 'model.h5')) print(f"Model summary after training:") print(model.model.summary())
def trainNNmodel(X, Y): # Get the training and test data X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3) # Function to create the NN model, required for the wrapper def create_keras_model(): model = Sequential() model.add( Dense(64, input_dim=X.shape[1], kernel_initializer='glorot_normal', activation='relu')) model.add(BatchNormalization()) model.add(Dropout(0.5)) model.add( Dense(128, kernel_initializer='glorot_normal', activation='relu')) model.add(BatchNormalization()) model.add(Dropout(0.5)) model.add( Dense(16, kernel_initializer='glorot_normal', activation='relu')) model.add(Dense(1, activation='sigmoid')) model.compile(optimizer="adam", loss='binary_crossentropy', metrics=['accuracy']) return model # Fit the model early_stop = callbacks.EarlyStopping(monitor="accuracy", patience=50, mode='max') callbacks_list = [early_stop] estimator = KerasClassifier(build_fn=create_keras_model, epochs=200, batch_size=12, verbose=0, callbacks=callbacks_list) estimator.fit(X_train, y_train, batch_size=12, epochs=200, verbose=1, callbacks=callbacks_list) y_pred = estimator.predict(X_test) y_pred = [item for sublist in y_pred for item in sublist] y_pred_rt = estimator.predict_proba(X_test)[:, 1] accuracy = str(accuracy_score(y_test, y_pred)) fpr, tpr, thresholds = roc_curve(y_test, y_pred_rt) auc_value = str(auc(fpr, tpr)) precision = str(precision_score(y_test, y_pred)) recall = str(recall_score(y_test, y_pred)) f1score = str(f1_score(y_test, y_pred, average="weighted")) return [ accuracy, auc_value, precision, recall, f1score, y_test, y_pred, y_pred_rt, estimator.model ]
def feature_importance(create_model): t_model = KerasClassifier(build_fn=create_model, epochs=EPOCHS, batch_size=5, verbose=0) t_model.fit(X_train, Y_train) perm = PermutationImportance(t_model, random_state=1).fit(X_train, Y_train) display(eli5.show_weights(perm, feature_names=featureNames))
def train_model(X_train, y_train): classifier = KerasClassifier(build_fn = build_classifier, batch_size = 171, epochs = 94) cv = ShuffleSplit(n_splits = 10, test_size = 0.2, random_state = 0) accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = cv, verbose = 2) classifier.fit(X_train, y_train, batch_size = 171, epochs = 94) return [classifier, accuracies]
def model_probs(self, classifier=None): if not classifier: classifier = KerasClassifier(build_fn=self.model_build, epochs=200, batch_size=5) classifier.fit(self.X_train, self.y_train) predictions = classifier.predict_proba(self.X_test) return predictions
class r_lgb_model(object): def __init__(self, k, params): self.k = k self.skf = StratifiedKFold(n_splits=self.k, shuffle=True, random_state=42) self.params = params self. model_list = [0] * self.k def auroc(self, y_true, y_pred): return tf.py_func(roc_auc_score, (y_true, y_pred), tf.double) def custom_loss(self, y_pred, y_true): # ba = balanced_accuracy_score(np.where(y_true >= 0.5, 1, 0), np.where(y_pred >= 0.5, 1, 0)) rs = recall_score(np.where(y_true >= 0.5, 1, 0), np.where(y_pred >= 0.5, 1, 0)) # rauc = roc_auc_score(y_true, y_pred) return 'custom loss', 1/np.e**(np.log(rs)**2), True def create_baseline(self): model = Sequential() model.add(Dense(64, input_dim=self.dim, kernel_initializer='normal', activation='relu')) model.add(Dropout(0.2)) model.add(Dense(256, kernel_initializer='normal', activation='relu')) # model.add(Dense(1024, kernel_initializer='normal', activation='relu')) # model.add(Dense(512, kernel_initializer='normal', activation='relu')) model.add(Dense(1, kernel_initializer='normal', activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam',metrics=[self.auroc]) return model def _fit(self, X, y, verbose, esr, weights): # X_train, X_test, y_train, y_test = train_test_split(X, y, # test_size=0.2, # random_state=42) # # we, _, _, _ = train_test_split(weights, weights, # test_size=0.2, # random_state=42) self.dim = X.shape[1] callbacks = [EarlyStopping(monitor='val_auroc', min_delta=0.0, patience=1, verbose=0, mode='max', restore_best_weights=True)] self.lgb_model = KerasClassifier(build_fn=self.create_baseline, epochs=30, batch_size=1024, verbose=1, validation_split=0.20, callbacks=callbacks) # model = self.create_baseline(X_train.shape[1]) self.lgb_model.fit(X_train, y_train)
def perform_keras(name, mongo_host): df = load_train_all_xgb().head(10000) del_trash_cols(df) blja_nan(df) # add_kur_combinations(df) folds = 5 seed = 42 skf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=seed) losses = [] n_est = [] counter = 0 for big_ind, small_ind in skf.split(np.zeros(len(df)), df[TARGET]): big = df.iloc[big_ind] small = df.iloc[small_ind] print explore_target_ratio(big) print explore_target_ratio(small) big, small = oversample(big, small, seed) print explore_target_ratio(big) print explore_target_ratio(small) train_target = big[TARGET] del big[TARGET] train_arr = big test_target = small[TARGET] del small[TARGET] test_arr = small global NUM_FETURES NUM_FETURES = len(train_arr.columns) scaller = StandardScaler() train_arr = scaller.fit_transform(train_arr) test_arr = scaller.transform(test_arr) estimator = KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=10) print len(train_arr) print len(test_arr) eval_set = [(train_arr, train_target), (test_arr, test_target)] estimator.fit(train_arr, train_target) proba = estimator.predict_proba(test_arr) loss = log_loss(test_target, proba) out_loss(loss) losses.append(loss) # xgb.plot_importance(estimator) # plot_errors(stats) out_loss('avg = {}'.format(np.mean(losses)))
def evaluate(model, seed, x, y, t, ep, bz): print('K-fold validation ... ', end='', flush=True) estimator = KerasClassifier(build_fn=model, epochs=ep, batch_size=bz, verbose=0) estimator.fit(x, y) results = cross_val_score(estimator, x, y, cv=k_fold(10, t, seed)) print('%.2f%% (%.2f%%)' % (results.mean() * 100, results.std() * 100))
def testing(X_train=[], X_test=[], V_train=[], V_test=[], t_train=[], t_test=[], Y_train=[], Y_test=[], top_words=9444, max_review_length=1000, embedding_length=300, batch_size=128, nb_epoch=100, preset={}, option='lstm'): X_train = sequence.pad_sequences(X_train, maxlen=max_review_length) X_test = sequence.pad_sequences(X_test, maxlen=max_review_length) if option == 'cnn': preset.update({ 'build_fn': cnn_train, 'top_words': top_words, 'max_length': max_review_length, 'embedding_length': embedding_length, 'batch_size': batch_size, 'nb_epoch': nb_epoch, 'verbose': 1 }) model = KerasClassifier(**preset) elif option == 'lstm': preset.update({ 'build_fn': lstm_train, 'top_words': top_words, 'max_length': max_review_length, 'embedding_length': embedding_length, 'batch_size': batch_size, 'nb_epoch': nb_epoch, 'verbose': 1 }) model = KerasClassifier(**preset) else: print("ERROR AT TRAINING PHASE OF TESTING.") if option == 'cnn' or option == 'lstm': model.fit(X_train, Y_train) elif option == 'classic': model.fit( decay_norm(x=np.array(V_train), t_stamps=t_train, embedding_length=embedding_length, max_review_length=max_review_length)[0], Y_train) predict = model.predict(X_test) acc = accuracy_score(Y_test, predict) f1 = f1_score(Y_test, predict) auc = roc_auc_score(Y_test, predict) return ({'acc': acc, 'f1': f1, 'auc': auc})
def _model_build(self, *arg): self._prepare_test_data() model = KerasClassifier( build_fn=self.create_model, verbose=0) optimizers = [ 'adam'] init = [ 'normal', 'uniform'] epochs = [ 100, 150] batches = [ 5, 10] param_grid = dict( optimizer=optimizers, nb_epoch=epochs, batch_size=batches, init=init) grid = GridSearchCV( estimator=model, param_grid=param_grid, cv=5) grid_result = grid.fit( self.x_train, self.y_train) print("Best: %f using %s" % ( grid_result.best_score_, grid_result.best_params_)) # means = grid_result.cv_results_[ # 'mean_test_score'] # stds = grid_result.cv_results_[ # 'std_test_score'] # params = grid_result.cv_results_[ # 'params'] # for mean, stdev, param in zip(means, stds, params): # print("%f (%f) with: %r" % ( # mean, # stdev, # param)) # Training # with Best # Parameter model = Sequential() model.add(Dense( 12, input_dim=8, init=grid_result.best_params_['init'], activation='relu')) model.add(Dense( 8, init=grid_result.best_params_['init'], activation='relu')) model.add(Dense( 1, init=grid_result.best_params_['init'], activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer=grid_result.best_params_['optimizer'], metrics=['accuracy']) # Compile # model model.fit( self.x_train, self.y_train, nb_epoch=grid_result.best_params_['nb_epoch'], batch_size=grid_result.best_params_['batch_size']) yy_pred = model.predict( self.x_test) self.y_pred = [np.round( x) for x in yy_pred] self.y_true = self.y_test self.prob = model.predict_proba( self.x_test) self._analyse_result()
def SimpleLoss(individual, data, labels, layers, activation, *_): network = KerasClassifier(build_fn=CreateNeuralNetwork, input_size=data['train'].shape[1], output_size=2 if len(labels['train'].shape) < 2 else labels['train'].shape[1], layers=layers,activation=activation,lr=individual[1], dropout=individual[2],epochs=int(individual[0]),verbose=0) network.fit(data['train'],labels['train']) score = network.score(data['test'],labels['test']) return 1 - score
def test_keras_classifier(): model = Sequential() model.add(Dense(input_dim, input_shape=(input_dim,))) model.add(Activation('relu')) model.add(Dense(nb_class)) model.add(Activation('softmax')) sklearn_clf = KerasClassifier(model, optimizer=optim, loss=loss, train_batch_size=batch_size, test_batch_size=batch_size, nb_epoch=nb_epoch) sklearn_clf.fit(X_train, y_train) sklearn_clf.score(X_test, y_test)
def main(): code_dir = '/home/john/git/kaggle/OttoGroup/' data_dir = '/home/john/data/otto/' training_file = 'train.csv' os.chdir(code_dir) np.random.seed(1337) print('Starting script...') print('Loading data...') X, labels = load_training_data(data_dir, training_file) print('Pre-processing...') scaler = create_scaler(X) X = apply_scaler(X, scaler) y, y_onehot, encoder = preprocess_labels(labels) num_features = X.shape[1] num_classes = y_onehot.shape[1] print('Features = ' + str(num_features)) print('Classes = ' + str(num_classes)) print('Building model...') model = define_model(num_features, num_classes) print('Complete.') print('Training model...') wrapper = KerasClassifier(model) wrapper.fit(X, y_onehot, nb_epoch=20) print('Complete.') print('Training score = ' + str(wrapper.score(X, y_onehot))) preds = wrapper.predict(X) print('Predictions shape = ' + str(preds.shape)) proba = wrapper.predict_proba(X) print('Probabilities shape = ' + str(proba.shape)) print('Building ensemble...') ensemble = BaggingClassifier(wrapper, n_estimators=3, max_samples=1.0, max_features=1.0) print('Complete.') print('Training ensemble...') ensemble.fit(X, y) print('Complete.') print('Ensemble score = ' + str(ensemble.score(X, y))) print('Script complete.')
class Baseline(object): """Provide general machine learning models as baseline.""" def __init__(self, train, valid, target, features, impute=True): super(Baseline, self).__init__() self.target = target self.features = features self.train = train self.valid = valid if impute: import pandas as pd from sklearn.preprocessing import Imputer self.train_prep = pd.DataFrame(Imputer(strategy='mean').fit_transform(self.train), columns=self.train.columns) self.valid_prep = pd.DataFrame(Imputer(strategy='mean').fit_transform(self.valid), columns=self.valid.columns) else: self.train_prep = self.train self.valid_prep = self.valid def LR(self, report=False): """Logistic Regression. Args: feature_num: number of feaures to keep in the model. report: whether print out the model analysis report. Returns: Logistic regression model.""" from sklearn.linear_model import LogisticRegression self.lr = LogisticRegression(n_jobs=-1) self.lr.fit(self.train_prep[self.features], self.train_prep[self.target]) if report: from Report import Report rpt = Report(self.lr, self.train_prep, self.valid_prep, self.target, self.features) rpt.ALL() return self.lr def RF(self, report=False): """Random Forest. Args: report: whether print out the model analysis report. Returns: Decision tree model generated from Random Forest.""" from sklearn.ensemble import RandomForestClassifier self.rf = RandomForestClassifier(n_estimators=1000, max_features='sqrt', max_depth=10, random_state=0, n_jobs=-1) self.rf.fit(self.train_prep[self.features], self.train_prep[self.target]) if report: from Report import Report rpt = Report(self.rf, self.train_prep, self.valid_prep, self.target, self.features) rpt.ALL() return self.rf def GBDT(self, report=False): """Gradient Boosting Decision Tree. Args: report: whether print out the model analysis report. Returns: Decision tree model generated from Gradient Boosting Decision Tree.""" import lightgbm as lgb from sklearn.model_selection import train_test_split train, test = train_test_split(self.train, test_size=0.2, random_state=0) lgb_train = lgb.Dataset(train[self.features], train[self.target], free_raw_data=False) lgb_valid = lgb.Dataset(test[self.features], test[self.target], reference=lgb_train, free_raw_data=False) params = { 'boosting_type': 'gbdt', 'objective': 'bianry', 'metric': 'auc', 'num_leaves': 64, 'learning_rate': 0.01, 'feature_fraction': 0.75, 'bagging_fraction': 0.75, 'bagging_freq': 5, 'verbose': 0 } self.gbdt = lgb.train(params, lgb_train, num_boost_round=10000, valid_set=lgb_valid, early_stopping_round=200, verbose_eval=100) if report: from Report import Report rpt = Report(self.gbdt, self.train, self.valid, self.target, self.features) rpt.ALL() return self.gbdt def NN(self, report=False): """Neutral Network. Args: report: whether print out the model analysis report. Returns: One layer neutral network model.""" from keras.models import Sequential from keras.layers import Dense from keras.wrappers.scikit_learn import KerasClassifier def baseline_model(): model = Sequential() model.add(Dense(8, input_dim=len(self.features), activation='relu')) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model self.nn = KerasClassifier(build_fn=baseline_model, epochs=5, batch_size=5, verbose=1) self.nn.fit(self.train[self.features], self.train[self.target]) if report: from Report import Report rpt = Report(self.nn, self.train, self.valid, self.target, self.features) rpt.ALL() return self.nn
# create model model = Sequential() model.add(Dense(12, input_dim=34, init='uniform', activation='relu')) model.add(Dense(8, init='uniform', activation='relu')) model.add(Dense(1, init='uniform', activation='sigmoid')) # Compile model model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model # create model model = KerasClassifier(build_fn=create_model, nb_epoch=20, batch_size=32) # evaluate using 10-fold cross validation # kfold = KFold(n=len(features_train), n_folds=10, shuffle=True, random_state=seed) # results = cross_val_score(model, features_train.values, labels_train.values, cv=kfold) # print "Cross validation results:", (results.mean()*100), (results.std()*100) model.fit(features_train.values, labels_train.values) print "Model building complete:",round((time()-t0)/60,3),"m" # print len(np.unique(train.user_id)), len(np.unique(test.user_id)) # features_train, features_test, labels_train, labels_test = cross_validation.train_test_split(features_train, labels_train, test_size=0.60) # # neigh = neighbors.KNeighborsClassifier(weights='distance', n_jobs=-1).fit(train[features], train['hotel_cluster']) # forest = ensemble.RandomForestClassifier(n_estimators=10, n_jobs=-1).fit(train[features], train['hotel_cluster']) # # bayes = naive_bayes.GaussianNB().fit(train[features], train['hotel_cluster']) t0 = time() print "Predicting probabilities..." probs = pd.DataFrame(model.predict_proba(features_test.values, batch_size=32))
# scikit-learn wrapper test # ############################# print('Beginning scikit-learn wrapper test') print('Defining model') model = Sequential() model.add(Dense(784, 50)) model.add(Activation('relu')) model.add(Dense(50, 10)) model.add(Activation('softmax')) print('Creating wrapper') classifier = KerasClassifier(model) print('Fitting model') classifier.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch) print('Testing score function') score = classifier.score(X_train, Y_train) print('Score: ', score) print('Testing predict function') preds = classifier.predict(X_test) print('Preds.shape: ', preds.shape) print('Testing predict proba function') proba = classifier.predict_proba(X_test) print('Proba.shape: ', proba.shape) print('Testing get params') print(classifier.get_params())
model.add(Dense(9, init='normal', activation="softmax")) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model from keras.utils import np_utils from sklearn.preprocessing import LabelEncoder encoder = LabelEncoder() encoder.fit(train_y) encoded_y = encoder.transform(train_y) dummy_y = np_utils.to_categorical(encoded_y) print(dummy_y.shape) estimator = KerasClassifier(build_fn=baseline_model, nb_epochs=10, batch_size=64) estimator.fit(sentence_vectors[0:3321], dummy_y, validation_split=0.05) y_pred = estimator.predict_proba(sentence_vectors[3321:]) """ Submission """ submission = pd.DataFrame(y_pred) submission['id'] = test_index submission.columns = ['class1', 'class2', 'class3', 'class4', 'class5', 'class6', 'class7', 'class8', 'class9', 'id'] submission.to_csv("submission_keras_classify.csv",index=False)
class BaseKerasSklearnModel(base_model.BaseModel): ''' base keras model based on keras's model(without sklearn) ''' ## def __init__(self, data_file, delimiter, lst_x_keys, lst_y_keys, log_filename=DEFAULT_LOG_FILENAME, model_path=DEFAULT_MODEL_PATH, create_model_func=create_model_demo): ## ''' ## init ## ''' ## import framework.tools.log as log ## loger = log.init_log(log_filename) ## self.load_data(data_file, delimiter, lst_x_keys, lst_y_keys) ## self.model_path = model_path ## self.create_model_func=create_model_func def __init__(self, **kargs): ''' init ''' import framework.tools.log as log self.kargs = kargs log_filename = self.kargs["basic_params"]["log_filename"] model_path = self.kargs["basic_params"]["model_path"] self.load_data_func = self.kargs["load_data"]["method"] self.create_model_func = self.kargs["create_model"]["method"] loger = log.init_log(log_filename) (self.dataset, self.X, self.Y, self.X_evaluation, self.Y_evaluation) = self.load_data_func(**self.kargs["load_data"]["params"]) self.model_path = model_path self.dic_params = {} def load_data(self, data_file, delimiter, lst_x_keys, lst_y_keys): ''' load data ''' # Load the dataset self.dataset = numpy.loadtxt(data_file, delimiter=",") self.X = self.dataset[:, lst_x_keys] self.Y = self.dataset[:, lst_y_keys] def init_callbacks(self): ''' init all callbacks ''' os.system("mkdir -p %s" % (self.model_path)) checkpoint_callback = ModelCheckpoint(self.model_path + '/weights.{epoch:02d}-{acc:.2f}.hdf5', \ monitor='acc', save_best_only=False) history_callback = LossHistory() callbacks_list = [checkpoint_callback, history_callback] self.dic_params["callbacks"] = callbacks_list def init_model(self): ''' init model ''' train_params = {"nb_epoch": 10, "batch_size": 10} self.dic_params.update(train_params) self.model = KerasClassifier(build_fn=self.create_model_func, **self.kargs["create_model"]["params"]) # self.model = KerasClassifier(build_fn=self.create_model_func) self.model.set_params(**self.dic_params) def train_model(self): ''' train model ''' X = self.X Y = self.Y X_evaluation = self.X_evaluation Y_evaluation = self.Y_evaluation seed = 7 numpy.random.seed(seed) # Load the dataset history = self.model.fit(X, Y) scores = self.model.score(X, Y) #history_callback = self.dic_params["callbacks"][1] # print dir(history_callback) # logging.info(str(history_callback.losses)) logging.info("final : %.2f%%" % (scores * 100)) logging.info(str(history.history)) def process(self): ''' process ''' self.init_callbacks() self.init_model() self.train_model()
# return the best three results def top_n(matrix_prob, label_map): ans = [] for line in matrix_prob: rank = [label_map[item[0]] for item in sorted(enumerate(line), key=lambda v:v[1], reverse=True)] ans.append(rank[:3]) return ans # basic neural network model def basic_model(): model = Sequential() model.add(Dense(output_dim=500, input_dim=100, activation='relu')) model.add(Dropout(0.2)) model.add(Dense(output_dim=42, input_dim=500, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model if __name__ == '__main__': X = pd.read_csv('./data/triple_train_x_mean.txt', header=None, encoding='utf-8') Y = pd.read_csv('./data/triple_train_y.txt', header=None, encoding='utf-8') X_test = pd.read_csv('./data/triple_test_x_mean.txt', header=None, encoding='utf-8') matrix_y = np_utils.to_categorical(Y,42) # KerasClassifier analysis classifier = KerasClassifier(build_fn=basic_model, nb_epoch=10, batch_size=500) classifier.fit(X, Y) pred_prob = classifier.predict_proba(X_test) with open('./model/task2_label_space.txt', encoding='utf-8') as flabel: label_map = flabel.read().split() pd.DataFrame(top_n(pred_prob, label_map)).to_csv('./data/task2_ans_int_index.txt', index=None, header=None, encoding='utf-8')
the number of epochs and the batch size. We pass the number of training epochs to the KerasClassifier, again using reasonable default values. Verbose output is also turned off given that the model will be created 10 times for the 10-fold cross validation being performed. """ # Rescale our data # evaluate baseline model with standardized dataset estimator = KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=1) """ We are going to use scikit-learn to evaluate the model using stratified k-fold cross validation. This is a resampling technique that will provide an estimate of the performance of the model. It does this by splitting the data into k-parts, training the model on all parts except one which is held out as a test set to evaluate the performance of the model. This process is repeated k-times and the average score across all constructed models is used as a robust estimate of performance. It is stratified, meaning that it will look at the output values and attempt to balance the number of instances that belong to each class in the k-splits of the data. """ kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed) results = cross_val_score(estimator, X, encoded_Y, cv=kfold) print("Results: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100)) estimator.fit(X, Y) prediction = estimator.predict(X) print("Real: {}".format(Y)) print("Predicted: {}".format(prediction))
print(LRscore) # plt.plot(c,LRscore,'bx-') # plt.xlabel('penalty') # plt.ylabel('validation score') # plt.title('LR Model selection') # plt.show() # #logisticModel = LogisticRegression(penalty='l2') # #scores[1] = cross_val_score(logisticModel,train_data,label_data,cv=5) # #test model 3 : Neutral network #NNModel = MLPClassifier(solver='adam', alpha=1e-5,hidden_layer_sizes=(5000,100), random_state=1,max_iter=500) tbCallback = TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True) NNModel = KerasClassifier(build_fn=create_model,epochs=1200, batch_size=150,verbose=0) cv = ShuffleSplit(n_splits=1, test_size=0.3, random_state=0) #NNscore = cross_val_score(NNModel,train_data,label_data,fit_params={'callbacks': [tbCallback]},cv=cv) NNModel.fit(train_data,label_data) prediction = NNModel.predict(test_data) prediction = np.array(prediction) print(prediction) np.savetxt("prediction.csv", prediction, delimiter=",") #print('MLPClassifier validation score : ',NNscore) #test model 4 : SVM # c = [1] # SVMscore = np.zeros(len(c)) # j = 0 # for i in c: # svmModel = SVC(C=i,kernel='linear') # SVMscore[j] = np.mean(cross_val_score(svmModel,train_data,label_data,cv=5)) # j = j+1
def baseline_model(): model = Sequential() model.add(Dense(100, input_shape=(10249,))) model.add(Activation('relu')) model.add(Dropout(0.7)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) # Compile model model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model # In[43]: estimator = KerasClassifier(build_fn=baseline_model, nb_epoch=nb_epoch, batch_size=batch_size, verbose=1) estimator.fit(X_train_mat, y_train_cat) # In[44]: predictions = estimator.predict(X_test_mat) print(set(predictions)) print(encoder.inverse_transform(predictions)) # In[45]: print 'macro f1:', f1_score(encoded_Y_test, predictions, average='macro') # In[ ]:
class Baseline(object): """Provide general machine learning models as baseline.""" def __init__(self, train, valid, target, features, impute=True): super(Baseline, self).__init__() self.target = target self.features = features self.train = train self.valid = valid if impute: import pandas as pd from sklearn.preprocessing import Imputer self.train_prep = pd.DataFrame(Imputer(strategy='mean').fit_transform(self.train), columns=self.train.columns) self.valid_prep = pd.DataFrame(Imputer(strategy='mean').fit_transform(self.valid), columns=self.valid.columns) else: self.train_prep = self.train self.valid_prep = self.valid def LR(self, report=False): """Logistic Regression. Args: feature_num: number of feaures to keep in the model. report: whether print out the model analysis report. Returns: Logistic regression model.""" from sklearn.linear_model import LogisticRegression self.lr = LogisticRegression(n_jobs=-1) self.lr.fit(self.train_prep[self.features], self.train_prep[self.target]) if report: from Report import Report rpt = Report(self.lr, self.train_prep, self.valid_prep, self.target, self.features) rpt.ALL() return self.lr def RF(self, report=False): """Random Forest. Args: report: whether print out the model analysis report. Returns: Decision tree model generated from Random Forest.""" from sklearn.ensemble import RandomForestClassifier self.rf = RandomForestClassifier(n_estimators=1000, max_features='sqrt', max_depth=10, random_state=0, n_jobs=-1) self.rf.fit(self.train_prep[self.features], self.train_prep[self.target]) if report: from Report import Report rpt = Report(self.rf, self.train_prep, self.valid_prep, self.target, self.features) rpt.ALL() return self.rf def GBDT(self, report=False): """Gradient Boosting Decision Tree. Args: report: whether print out the model analysis report. Returns: Decision tree model generated from Gradient Boosting Decision Tree.""" from xgboost.sklearn import XGBClassifier self.gbdt = XGBClassifier(objective='binary:logistic', booster='gbtree', learning_rate=0.01, n_estimators=5000, max_depth=3, subsample=0.75, colsample_bytree=0.75, n_jobs=4, random_state=2018) self.gbdt.fit(self.train_prep[self.features], self.train_prep[self.target]) if report: from Report import Report rpt = Report(self.gbdt, self.train, self.valid, self.target, self.features) rpt.ALL() return self.gbdt def NN(self, report=False): """Neutral Network. Args: report: whether print out the model analysis report. Returns: One layer neutral network model.""" from keras.models import Sequential from keras.layers import Dense from keras.wrappers.scikit_learn import KerasClassifier def baseline_model(): model = Sequential() model.add(Dense(8, input_dim=len(self.features), activation='relu')) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model self.nn = KerasClassifier(build_fn=baseline_model, epochs=5, batch_size=5, verbose=1) self.nn.fit(self.train[self.features], self.train[self.target]) if report: from Report import Report rpt = Report(self.nn, self.train, self.valid, self.target, self.features) rpt.ALL() return self.nn
optimizer='adam', metrics=['accuracy']) return model # データを読み込み --- (※2) data = json.load(open("./newstext/data-mini.json")) #data = json.load(open("./newstext/data.json")) X = data["X"] # テキストを表すデータ Y = data["Y"] # カテゴリデータ # 最大単語数を指定 max_words = len(X[0]) # 学習 --- (※3) X_train, X_test, Y_train, Y_test = train_test_split(X, Y) Y_train = np_utils.to_categorical(Y_train, nb_classes) print(len(X_train),len(Y_train)) model = KerasClassifier( build_fn=build_model, nb_epoch=nb_epoch, batch_size=batch_size) model.fit(X_train, Y_train) # 予測 --- (※4) y = model.predict(X_test) ac_score = metrics.accuracy_score(Y_test, y) cl_report = metrics.classification_report(Y_test, y) print("正解率=", ac_score) print("レポート=\n", cl_report)
reasonable default values. Verbose output is also turned off given that the model will be created 10 times for the 10-fold cross validation being performed. """ # Rescale our data # evaluate baseline model with standardized dataset estimator = KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=1) """ We are going to use scikit-learn to evaluate the model using stratified k-fold cross validation. This is a resampling technique that will provide an estimate of the performance of the model. It does this by splitting the data into k-parts, training the model on all parts except one which is held out as a test set to evaluate the performance of the model. This process is repeated k-times and the average score across all constructed models is used as a robust estimate of performance. It is stratified, meaning that it will look at the output values and attempt to balance the number of instances that belong to each class in the k-splits of the data. """ kfold = StratifiedKFold(n_splits=1000, shuffle=True, random_state=seed) results = cross_val_score(estimator, X, encoded_Y, cv=kfold) print("Results: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100)) """ estimator.fit(X, Y) prediction = estimator.predict(X) print("Real: {}".format(Y)) print("Predicted: {}".format(prediction)) """
model.add(Dense(1)) model.add(BatchNormalization()) model.add(Dropout(0.5)) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer="adam", metrics=['accuracy']) return model #thank god for wrappers def nn_model(): return KerasClassifier(build_fn=create_baseline, nb_epoch=20, batch_size=50, verbose = 1) model = KerasClassifier(build_fn=create_baseline, nb_epoch=10, batch_size=80, verbose = 0) model.fit(X_train, y_train, nb_epoch=7, batch_size=300, validation_split=0.1, show_accuracy=True) scores = cross_validation.cross_val_score(model, X, y, cv = 5, scoring = "accuracy", n_jobs = -1, verbose = 1) model.fit(X_train, y_train, verbose=2) y_pred = model.predict(X_test) ''' print y_pred print y_test print mean_squared_error(y_test, y_pred) ''' #scores = roc_auc_score(y_test,y_pred) print scores #print f1_score(y_test, y_pred, average='macro')
optimizer='adam', metrics=['accuracy']) print(model.summary()) return model # モデルを生成 model = KerasClassifier( build_fn=build_model, nb_epoch=nb_epoch, batch_size=batch_size) # テストデータを読み込み data = json.load(open("./newstext/data-mini.json")) X = data["X"] Y = data["Y"] X_train, X_test, Y_train, Y_test = train_test_split(X, Y) Y_train = np_utils.to_categorical(Y_train, nb_classes) print(len(X_train),len(Y_train)) # 学習 model.fit(X_train, Y_train, verbose=1) y = model.predict(X_test) print(y) ac_score = metrics.accuracy_score(Y_test, y) cl_report = metrics.classification_report(Y_test, y) print("正解率=", ac_score) print("レポート=\n", cl_report)