def recognize_cnn(face, model_name, filepath='fitted_models/', ext='', return_name=True): people = pickle.load(open(filepath + 'ids_' + model_name + '.sav', 'rb')) X = face / 255 X = X.reshape(1, 100, 100, 3) model = Sequential() model.add( Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(100, 100, 3))) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.15)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.35)) model.add(Dense(len(people), activation='sigmoid')) model.load_weights(filepath + model_name + ext) if return_name == True: predictions = model.predict_proba(X)[0] return people[np.where(predictions == max(predictions))[0][0]] else: return model.predict(X)
class SimpleNN(BaseModel): def train(self, scale=True): self.scaler.fit(self.x) self.x = self.scaler.transform(self.x) x_train, x_test, y_train, y_test = train_test_split(self.x, self.y, shuffle=True) callbacks = [ EarlyStopping(monitor='val_loss', restore_best_weights=True) ] self.model = Sequential() self.model.add( Dense(500, activation='relu', input_dim=x_train.shape[1])) self.model.add(Dropout(0.2)) self.model.add(Dense(500, activation='relu')) self.model.add(Dense(1, activation='sigmoid')) self.model.compile(loss=keras.losses.binary_crossentropy, optimizer=Adam(decay=0.001, amsgrad=True), metrics=['acc']) self.model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=50000, verbose=1, callbacks=callbacks, batch_size=256) def predict(self, test): test = self.scaler.transform(test) return self.model.predict_proba(test)
class F22Cnn(): def fit(self, session_dir=None, X_train=None, y_train=None, X_test=None, y_test=None, epochs=0, batch_size=0): mean_px = X_train.mean().astype(np.float32) std_px = X_train.std().astype(np.float32) def standardize(x): return (x - mean_px) / std_px num_classes = len(set(y_train)) print('num_classes: {}'.format(num_classes)) self.clf = Sequential([ Lambda(standardize, input_shape=(28, 28, 1)), Convolution2D(32, (3, 3), activation='relu'), BatchNormalization(axis=1), Convolution2D(32, (3, 3), activation='relu'), MaxPooling2D(), BatchNormalization(axis=1), Convolution2D(64, (3, 3), activation='relu'), BatchNormalization(axis=1), Convolution2D(64, (3, 3), activation='relu'), MaxPooling2D(), Flatten(), BatchNormalization(axis=1), Dense(512, activation='relu'), Dense(10, activation='softmax'), ]) self.clf.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy']) self.batch_size = batch_size self.clf, history = fit_cnn(self.clf, X_train, y_train, X_test, y_test, epochs, session_dir, self.batch_size) return history def score(self, X, y): y_cat = to_categorical(y) logging.info("Metric names: {}".format(self.clf.metrics_names)) return self.clf.evaluate(X, y_cat, batch_size=self.batch_size)[1] def predict_proba(self, X): return self.clf.predict_proba(X)
def neural_network(x_tr, y_tr, x_te, y_te, dum=False, min_max=False): start = time.clock() if dum: x_tr = data_std(x_tr, min_max=False) x_te = data_std(x_te, min_max=False) y_tr_dm = data_propr(y_tr, name=False) y_te_dm = data_propr(y_te, name=False) init = initializers.glorot_uniform(seed=1) simple_adam = optimizers.Adam() model = Sequential() model.add( Dense(units=5, input_dim=x_te.shape[1], kernel_initializer=init, activation='relu')) model.add(Dropout(1)) model.add(Dense(units=6, kernel_initializer=init, activation='sigmoid')) model.add(Dropout(1)) model.add(Dense(units=5, kernel_initializer=init, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer=simple_adam, metrics=['accuracy']) model.fit(x_tr, y_tr_dm, verbose=0, class_weight={ 0: 0.05, 1: 0.05, 2: 0.49, 3: 0.499, 4: 0.01 }) #返回模型的基础结构 NN_class_repot = classification_report(model.predict_classes(x_te), y_te) NN_class_con = confusion_matrix(model.predict_classes(x_te), y_te) NN_class_pred = model.predict_classes(x_te) NN_class_pred_prob = model.predict_proba(x_te) #输出精确度 print('神经网络耗时:', end='--') print(model.evaluate(x_te, y_te_dm)) #简单绘制模型的roc曲线 poc_plt(y_te_dm, NN_class_pred_prob) end = time.clock() #计算模型耗时 print('神经网络耗时:%f' % (end - start)) return NN_class_repot, NN_class_con, NN_class_pred, NN_class_pred_prob, model
def recognize_lh(face, model_name, filepath='fitted_models/', ext='', return_name=True): people = pickle.load(open(filepath + 'ids_' + model_name + '.sav', 'rb')) X = face / 255 X = X.reshape(1, 100, 100, 3) model = Sequential() # First convolutional layer, note the specification of shape model.add( Conv2D(96, kernel_size=(7, 7), activation='relu', input_shape=(100, 100, 3))) model.add(MaxPooling2D(pool_size=(3, 3))) model.add(Conv2D(256, (5, 5), activation='relu')) model.add(MaxPooling2D(pool_size=(3, 3))) model.add(Conv2D(384, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(3, 3))) #model.add(Dropout(0.1)) model.add(Flatten()) model.add(Dense(512, activation='relu')) model.add(Dropout(0.25)) model.add(Dense(512, activation='relu')) model.add(Dropout(0.25)) model.add(Dense(len(people), activation='softmax')) model.load_weights(filepath + model_name + ext) if return_name == True: predictions = model.predict_proba(X)[0] return people[np.where(predictions == max(predictions))[0][0]] else: return model.predict(X)
class Model: def __init__(self,train_data,maxlen): self.train_data=train_data self.maxlen=maxlen self.embedding_model=None self.ml_model=None def load_data(self): """ Loading the data into a dataframe Input path: path to the test data(String) Output train_data: return a pandas Dataframe """ print(self.train_data.head()) #referenced from https://stackoverflow.com/questions/16645799/how-to-create-a-word-cloud-from-a-corpus-in-python def show_wordcloud(self, title = None): """ depicting wordclouds of the input data Input data: input pandas Dataframe """ stopwords = set(STOPWORDS) wordcloud = WordCloud( background_color='white', stopwords=stopwords, max_words=200, max_font_size=40, scale=3, random_state=1 # chosen at random by flipping a coin; it was heads ).generate(str(self.train_data)) fig = plt.figure(1, figsize=(12, 12)) plt.axis('off') if title: fig.suptitle(title, fontsize=20) fig.subplots_adjust(top=2.3) plt.imshow(wordcloud) plt.show() def transform_data(self): """ Factorizing the simplified lithologies into numerical equivalents Input data: input pandas dataframe Output tuple containing the transformed data """ self.train_data['Lithology_original']=self.train_data['Lithology_original'].replace(np.nan,'',regex=True) self.train_data['Lithology_original'] =self.train_data['Lithology_original'].apply(preprocessor) self.train_data['Simplified_lithology']=self.train_data['Simplified_lithology'].replace(np.nan,'Unknown',regex=True) self.train_data['Simplified_lithology']=self.train_data['Simplified_lithology'].apply(preprocessor).astype(str) self.train_data['Simplified_lithology'],self.label=pd.factorize(self.train_data['Simplified_lithology']) self.list_of_descriptions=self.train_data['Lithology_original'].tolist() self.list_of_simple_lithology=self.train_data['Simplified_lithology'].tolist() def generate_embeddings(self): """ Generating FastText(vectorized version of each word) model from the vocabulary in the data Input list_of_descriptions: transformed descriptions list_of_simple_lithology: transformed simple lithologies Output model: Gensim fasttext model """ data=[] for x in self.list_of_descriptions: temp=[] if(isinstance(x,list)): for y in x: temp.append(y.lower()) data.append(temp) for x in self.list_of_simple_lithology: temp=[] if(isinstance(x,list)): for y in x: temp.append(y.lower()) data.append(temp) if(isinstance(x,float)): print(x) self.embedding_model=gensim.models.FastText(data,min_count=1,size=100,window=3) def split_data(self): """ Splitting the data into train and test Input train_data: Pandas dataframe Output tuple containing train and test data """ msk = np.random.rand(len(self.train_data)) < 0.75 self.train_X = self.train_data.Lithology_original[msk] self.test_X = self.train_data.Lithology_original[~msk] y=self.train_data['Simplified_lithology'] self.train_y = y[msk] self.test_y=y[~msk] def tokenize_input_data(self): """ Indexing each token in the descriptions Input train_X: list of input descriptions test_X : list of input descriptions Output Tuple containing indexed versions of the inputs """ self.tokenizer=Tokenizer(num_words=3000) self.tokenizer.fit_on_texts(self.train_X) self.train_X=self.tokenizer.texts_to_sequences(self.train_X) self.test_X=self.tokenizer.texts_to_sequences(self.test_X) def label_to_id(self): """ Indexing each label in the target(simplified lithology) Input train_y: list of labels test_y: list of labels Output tuple containing indexed versions of the input """ self.train_y=utils.to_categorical(self.train_y.tolist(),11,dtype='int') self.test_y=utils.to_categorical(self.test_y.tolist(),11,dtype='int') def pad_sentences(self): """ Adding padding to the descriptions so that each description is of the same length(maxlen) Input train_X: list of descriptions test_X: list of descriptions maxlen: int (maximum length of the descriptions) Output Tuple containing transformed versions of the input """ self.train_X= pad_sequences(self.train_X, padding='post', maxlen=self.maxlen) self.test_X= pad_sequences(self.test_X, padding='post', maxlen=self.maxlen) def create_embedding_matrix(self): """ Creating an embedding matrix to be fed into the neural network Input model: gensim word2vec model embedding_matrix: matrix depicting the embeddings """ self.embedding_matrix=np.zeros((len(self.embedding_model.wv.vocab),100)) for x,_ in self.embedding_model.wv.vocab.items(): if x in self.tokenizer.word_counts.keys(): self.embedding_matrix[self.tokenizer.word_index[x]]=np.array(self.embedding_model.wv[x], dtype=np.float32)[:100] def define_learning_model(self): """ Describing the deep learning model using Keras Input model:gensim word2vec model embedding_matrix: matrix of embeddings maxlen: maximum length of sentences Output lstm_model: deep learning model """ self.ml_model=Sequential() self.ml_model.add(layers.Embedding(len(self.embedding_model.wv.vocab), 100, weights=[self.embedding_matrix], input_length=self.maxlen, trainable=False)) self.ml_model.add(layers.LSTM(100)) #model.add(layers.Dropout(0.3)) #model.add(layers.LSTM(100,activation='tanh',recurrent_activation='sigmoid')) self.ml_model.add(layers.Dropout(0.3)) #model.add(layers.GlobalAveragePooling1D()) self.ml_model.add(layers.Dense(11,activation='softmax')) #self.ml_model.add(layers.Softmax()) #model.add(layers.Flatten()) adam=optimizers.Adam(lr=0.001) self.ml_model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy']) self.ml_model.summary() def calculate_accuracy(self): """ Calculating the accuracy of the model. Input train_X: list of descriptions train_y: list of labels Output: history: model after fitting the data """ msk = np.random.rand(len(self.train_X)) < 0.75 validation_data_X=self.train_X[~msk] validation_data_Y=self.train_y[~msk] self.history = self.ml_model.fit(self.train_X[msk],self.train_y[msk], epochs=10, verbose=2, validation_data=(validation_data_X,validation_data_Y)) _, accuracy = self.ml_model.evaluate(self.train_X, self.train_y, verbose=False) print("Training Accuracy: {:.4f}".format(accuracy)) _, accuracy = self.ml_model.evaluate(self.test_X, self.test_y, verbose=False) print("Testing Accuracy: {:.4f}".format(accuracy)) #used as reference from https://www.tensorflow.org/tutorials/keras/basic_text_classification def plot_loss(self): """ Plot the training and validation loss w.r.t epochs Input model: deep learning model """ history_dict = self.history.history history_dict.keys() loss = history_dict['loss'] val_loss = history_dict['val_loss'] epochs = range(1, len(loss) + 1) # "bo" is for "blue dot" plt.plot(epochs, loss, 'bo', label='Training loss') # b is for "solid blue line" plt.plot(epochs, val_loss, 'b', label='Validation loss') plt.title('Training and validation loss') plt.xlabel('Epochs') plt.ylabel('Loss') plt.legend() plt.show() #used as reference from https://www.tensorflow.org/tutorials/keras/basic_text_classification def plot_accuracy(self): """ Plot the training and validation accuracy w.r.t epochs Input model: deep learning model """ plt.clf() # clear figure history_dict = self.history.history history_dict.keys() acc = history_dict['acc'] val_acc = history_dict['val_acc'] epochs = range(1, len(acc) + 1) plt.plot(epochs, acc, 'bo', label='Training acc') plt.plot(epochs, val_acc, 'b', label='Validation acc') plt.title('Training and validation accuracy') plt.xlabel('Epochs') plt.ylabel('Accuracy') plt.legend() plt.show() def initialise_model(self): """ Develop the model based on the input data """ self.load_data() self.transform_data() self.generate_embeddings() self.split_data() self.tokenize_input_data() self.label_to_id() self.pad_sentences() self.create_embedding_matrix() self.define_learning_model() self.calculate_accuracy() def predict(self,data): """ Predict simplified lithologies for input data """ data['Description']=data['Description'].replace(np.nan,'',regex=True) data['Description']=data['Description'].astype(str) predict_X=self.tokenizer.texts_to_sequences(data['Description']) predict_X=pad_sequences(predict_X,padding='post',maxlen=self.maxlen) output=self.ml_model.predict_classes(predict_X) simplified_lithology=[] for x in output: simplified_lithology.append(self.label[x]) data['Simplified_Lithology']=pd.Series(simplified_lithology) data.to_csv('prediction_file.csv',index=False) def predict_certainity(self,data): data['Description']=data['Description'].replace(np.nan,'',regex=True) data['Description']=data['Description'].astype(str) predict_X=self.tokenizer.texts_to_sequences(data['Description']) predict_X=pad_sequences(predict_X,padding='post',maxlen=self.maxlen) output=self.ml_model.predict_proba(predict_X) return output
verbose=2) #Sistemin test verileri üzerinden test edilmesi kayip_orani, dogruluk_orani = sinif.evaluate(Giris_test, Cikis_test, verbose=1, batch_size=10) print(" ") print( "--------Eğitilen sisteme sokulan Test verisinin accuracy ve loss oranları----------" ) print(" ") print('test loss oranı ', kayip_orani) print('test accuracy oranı', dogruluk_orani) print(" ") #Sistemin test verisi üzerinden tahminde bulunarak sınıflandırma yapması Cikis_tahmin = sinif.predict_proba(Giris_test) #print(Cikis_tahmin) Cikis_tahmin = ( Cikis_tahmin > 0.5 ) #Tahmin edilen çıkış 0.5'in üzerinde ise sonucu 1(hasta), altında ise 0(sağlıklı) olarak kabul et. #Roc curve plot fonksiyonu from sklearn.metrics import roc_curve, auc def plot_roc(tahmin, Cikis): fpr, tpr, _ = roc_curve(Cikis, tahmin) roc_auc = auc(fpr, tpr) plt.figure() plt.plot(fpr, tpr, label='ROC curve (area=%0.2f)' % roc_auc) plt.plot([0, 1], [0, 1], 'k--') plt.xlim([0.0, 1.0])
model.load_weights('fitted_models/levi-hassner_006_minloss') while True: _, img = cap.read() gs = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale(gs, 1.1, 6) for (x, y, w, h) in faces: cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 5) face = img[y:y + h, x:x + w] face = cv2.resize(face, (100, 100)) face = face.reshape(1, 100, 100, 3) #identity = recognize_lh(face, 'levi-hassner_006', ext='_minloss') predictions = model.predict_proba(face)[0] identity = people[np.where(predictions == max(predictions))[0][0]] cv2.putText(img, identity, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 5) cv2.imshow('img', img) k = cv2.waitKey(30) & 0xff if k == 27: break cap.release()
) checkpoint = ModelCheckpoint("keras_model.pt", monitor='val_loss', verbose=1, save_best_only=True, mode='auto') model.fit(trainx, trainy, nb_epoch=params['nb_epochs'], batch_size=32, verbose=5, callbacks=[checkpoint, esp], validation_split=0.2) model.load_weights("keras_model.pt") pred_auc = model.predict_proba(testx, batch_size=64, verbose=0) # accuracy pred_auc = np.argmax(pred_auc, axis=1) acc = accuracy_score(testy_org, pred_auc) # mean squared error #acc = mean_squared_error(testy, pred_auc) trials = Trials() import time start = time.time() best = fmin(f_nn, space, algo=tpe.suggest, max_evals=5, trials=trials) end = time.time() print('\n\n\n') print('time: ', end - start) print('best: ') print(np.sqrt(trials.losses()))
bagging_fraction=0.75, bagging_freq=5, bagging_seed=7, feature_fraction=0.5, feature_fraction_seed=7, verbose=-1, min_data_in_leaf=80, min_sum_hessian_in_leaf=11 ) # Fit the data classifier.fit(X_train, y_train,) # Make predictions on the hold out data y_pred = (classifier.predict_proba(X_test)[:,1] >= 0.5).astype(int) # Get the confusion matrix print(confusion_matrix(y_test, y_pred)) # Get the accuracy score print("Accuracy of {}".format(accuracy_score(y_test, y_pred))) # Get the f1-Score print("f1 score of {}".format(f1_score(y_test, y_pred))) # Get the recall score print("Recall score of {}".format(recall_score(y_test, y_pred))) # Make predictions predictions = (classifier.predict_proba(X_test_df)[:,1] >= 0.5).astype(int)
class SemiSupLabeler(): """ @_init_: initialises the model - data_lab: labelled data - data_unlab: unlabelled data - data_submit: the submit version of the data """ def __init__(self, data_lab, data_unlab, data_submit): ###########################Default parameters##################### #NB:if some mandatory parameters are lacking in the json, default values will be taken #list of all potential parameters """ @params_nn: parameters of neural network - loss : loss used for the NN, cf the dictionnary above - optimizer: Adam, SGD, etc - learning rate: speaks for itself - metrics accuracy, we wont change it normally - decay: decay of the learning rate, generally of the order 1e-5 - momentum: momentum of the lr - patience: number of epochs you wait if you use earlystopmode for the validation accuracy to increase again - layers: shape of the network """ self.params_nn = [ 'loss', 'optimizer', 'learning rate', 'metrics', 'decay', 'momentum', 'batch_size', 'number of epochs', 'layers', 'patience' ] """ @params_ss: parameters of label spreading - manyfit: since the ss accuracy has some variance but doesnt take much to be computed, manyfit designs how many independant times we run it before averaging it in order to obtain a better estimation of the accuracy in question - ss_model: 'LabSpr' or 'LabProp'. So far, only LabSpr has converged - ss_kernel: 'knn' or 'rbf. So far only knn converges. ***WATCH OUT***: when using rbf, euler will complain that you use too much memory!! - gamma parameter for the rbf - neighbor parameter for knn - alpha parameter for knn and rbf: tells at which point you will take the information of your neighbors into account """ self.params_ss = [ 'UsingSS', 'manyfit', 'ss_model', 'ss_kernel', 'gamma', 'neighbor', 'alpha' ] """ @param_list: list of all parameters - Ratio: ratio represented by the training set - pca: number of principal components to use. if not present, no pca will be done - UsingNN: if set to false, the NN is not used. - data_state: 'save' or 'load'. If you want to train the NN only without having to run the ss algo again, do one run with data_state to true, and use data_state= 'load for the next ones. - scaler: 'normal' or 'standard' describes the preprocessing before applying the pca - paramsout: designates which parameters will be present in the output name ==> put the one you're playing with in order to easily see the difference """ self.param_list = [ 'Ratio', 'pca', 'UsingNN', 'paramsout', 'data_state', 'scaler' ] + self.params_nn + self.params_ss self.param_out = ['Ratio', 'pca', 'optimizer', 'layers'] self.data_lab = data_lab self.data_unlab = data_unlab self.data_submit = data_submit #--------------------- DATA IF NO JSON PROVIDED -------------------------- #Training: self.RATIO = 0.9 self.INPUT_DIM = 139 #PCA: self.scaler = 'Standard' self.PCA_MODE = True self.pca = 50 #Early stopping: self.EARLY_STOP_MODE = False self.patience = 50 #NEURAL NETWORK: self.USING_NN = True self.USING_SS = False assert (self.USING_NN or self.USING_SS) self.loss = "sparse_categorical_crossentropy" self.opt = "SGD" self.lr = 0.001 self.metric = "accuracy" self.decay = 0 self.momentum = 0 self.batch_size = 32 self.epochs = 5 self.lay_node = [("relu", 206), ('dropout', 0.33)] #Semi Supervised learning: self.datastate = 'save' self.ss_mod = 'LabSpr' self.ss_kern = 'knn' self.gamma = 20 self.neighbors = 7 self.alpha = 0.2 self.manyfit = 1 #----------------------- JSON AS ARGUMENT: ------------------------- #Checks wether the provided JSON is well formed: def check(inner, outer): for i in inner: if not (i in outer): print('unknown parameter. abort.', i) exit() self.JSON_MODE = (len(sys.argv) > 1) #In case a JSON was provided for the parameters: if (self.JSON_MODE): fn = sys.argv[1] if os.path.isfile(fn): print("successfully read the json file." + sys.argv[1]) self.json_dict = json.load(open(fn)) assert ('UsingNN' and 'paramsout' in self.json_dict) self.USING_NN = self.json_dict['UsingNN'] self.USING_SS = self.json_dict['UsingSS'] check(self.json_dict, self.param_list) check(self.json_dict['paramsout'], self.param_list) #iterate over the printed parameters and ensure they exist: self.param_out = self.json_dict['paramsout'] self.RATIO = self.json_dict['Ratio'] self.ss_mod = self.json_dict['ss_model'] self.ss_kern = self.json_dict['ss_kernel'] self.gamma = self.json_dict['gamma'] self.neighbors = self.json_dict['neighbor'] self.alpha = self.json_dict['alpha'] self.datastate = self.json_dict['data_state'] self.scaler = self.json_dict['scaler'] if ('manyfit' in self.json_dict): self.manyfit = self.json_dict['manyfit'] if (self.USING_NN): self.loss = self.json_dict['loss'] self.opt = self.json_dict['optimizer'] self.lr = self.json_dict['learning rate'] self.metric = self.json_dict['metrics'] self.decay = self.json_dict['decay'] self.momentum = self.json_dict['momentum'] self.batch_size = self.json_dict['batch_size'] self.epochs = self.json_dict['number of epochs'] lay_node = self.json_dict['layers'] self.PCA_MODE = ('pca' in self.json_dict) if (self.PCA_MODE): self.pca = self.json_dict['pca'] self.INPUT_DIM = self.pca self.EARLY_STOP_MODE = ('patience' in self.json_dict) if (self.EARLY_STOP_MODE): self.patience = self.json_dict['patience'] else: print("uncorrect path. abort.") print(sys.argv[1]) exit() #if no JSON is provided, the values are taken from the code: else: print("taking the values of the code because no JSON was given.") #Dictionnary of all the values of parameters used: self.json_dict = { 'Ratio': self.RATIO, 'UsingNN': self.USING_NN, 'UsingSS': self.USING_SS, 'ss_model': self.ss_mod, 'ss_kernel': self.ss_kern, 'loss': self.loss, 'optimizer': self.opt, 'learning rate': self.lr, 'metrics': self.metric, 'decay': self.decay, 'momentum': self.momentum, 'batch_size': self.batch_size, 'number of epochs': self.epochs, 'gamma': self.gamma, 'neighbor': self.neighbors, 'alpha': self.alpha, 'layers': self.lay_node, 'manyfit': self.manyfit, 'scaler': self.scaler } if (self.PCA_MODE): self.json_dict['pca'] = self.pca self.INPUT_DIM = self.pca if (self.EARLY_STOP_MODE): self.jsondict['patience'] = self.patience self.build_output_name() #Tensorboard/log part: self.logs_base_dir = "./logs" os.makedirs(self.logs_base_dir, exist_ok=True) self.log_spec = os.path.join(self.logs_base_dir, self.output_name) os.makedirs(self.log_spec, exist_ok=True) self.init_variables() """ @label_spr: performs label spreading """ def label_spr(self): RESULT_ACC_SS = 0 for i in range(self.manyfit): #Initialisinig of variables: self.init_variables() #PCA preprocessing: if (self.PCA_MODE): self.pca_preprocess(self.pca) #Semi supervised algo if (self.ss_mod == 'LabSpr' and self.ss_kern == 'knn'): self.label_prop_model = LabelSpreading( kernel='knn', gamma=self.gamma, n_neighbors=self.neighbors, alpha=self.alpha) elif (self.ss_mod == 'LabProp' and self.ss_kern == 'rbf'): self.label_prop_model = LabelPropagation( kernel='rbf', gamma=self.gamma, n_neighbors=self.neighbors, alpha=self.alpha, max_iter=10) else: self.label_prop_model = LabelPropagtion( kernel=self.ss_kern, gamma=self.gamma, n_neighbors=self.neighbors) print('Starting to fit. Run for shelter!') self.label_prop_model.fit(self.X_tot, self.y_tot) temp_acc = self.label_prop_model.score(self.X_valid_lab, self.y_valid) print('{} / {} :accuracy = {}'.format(i, self.manyfit, temp_acc)) RESULT_ACC_SS += temp_acc self.y_tot = self.label_prop_model.transduction_ self.y_submit = self.label_prop_model.predict(self.X_submit) if (self.datastate == "save"): self.save_to_csv(self.X_tot, self.y_tot, self.X_valid_lab, self.y_valid) RESULT_ACC_SS /= self.manyfit self.json_dict['ss_accuracy'] = RESULT_ACC_SS print('accuracy obtained on the test set of the ss algo:', RESULT_ACC_SS) """ @labelspr_predict: returns the predicion of the label spreading """ def labelspr_predict(self, X): return self.label_prop_model.predict(X) """ @init_variables : transforms the input data so that it is usable """ def init_variables(self): X_submit = self.data_submit.to_numpy() X_big_lab = (self.data_lab.to_numpy())[:, 1:] y_big = ((self.data_lab.to_numpy())[:, 0]).astype(int) X_train_lab, X_valid_lab, self.y_train, self.y_valid = train_test_split( X_big_lab, y_big, test_size=(1 - self.RATIO), random_state=14) X_unlab = self.data_unlab.to_numpy() X_tot = np.concatenate((X_train_lab, X_unlab), axis=0) self.y_tot = np.concatenate((self.y_train, np.full(len(X_unlab), -1))) if (self.scaler == 'Standard'): scaler = StandardScaler() elif (self.scaler == 'Normal'): scaler = Normalizer() else: scaler = StandardScaler() self.X_tot = scaler.fit_transform(X_tot) self.X_train_lab = scaler.transform(X_train_lab) self.X_unlab = scaler.transform(X_unlab) self.X_valid_lab = scaler.transform(X_valid_lab) self.X_submit = scaler.transform(X_submit) """@pca_preprocess: performs the preprocessing before the PCA """ def pca_preprocess(self, number): pca_mod = PCA(n_components=number) self.X_tot = pca_mod.fit_transform(self.X_tot) self.X_train_lab = pca_mod.transform(self.X_train_lab) self.X_unlab = pca_mod.transform(self.X_unlab) self.X_valid_lab = pca_mod.transform(self.X_valid_lab) self.X_submit = pca_mod.transform(self.X_submit) self.INPUT_DIM = number """@build_model: creates the model of the neural network """ def build_model(self): self.model = Sequential() for counter, (name, num) in enumerate(self.lay_node): if (counter == 0): self.model.add( Dense(num, activation='relu', input_dim=self.INPUT_DIM)) elif (name == 'dropout'): self.model.add(Dropout(rate=num)) elif (name == 'relu'): self.model.add(Dense(num, activation=tf.nn.relu)) elif (name == 'relu_bn'): self.model.add(Dense(num)) self.model.add(BatchNormalization()) self.model.add(Activation('relu')) else: print('uncorrect name for the layers. exit.') exit() #Last layer of neural network: self.model.add(Dense(10, activation='softmax')) #optimizer if (self.opt == 'SGD'): optimiz = SGD(lr=self.lr, decay=self.decay, momentum=self.momentum) elif (self.opt == 'Adam'): optimiz = Adam(lr=self.lr, decay=self.decay) else: print('uncorrect name for the layers. exit.') exit() self.model.compile(optimizer=optimiz, loss=self.loss, metrics=[self.metric]) """ @fit_lab: trains the neural network on labeled data """ def fit_lab(self): temp = self.nn_fit(self.X_train_lab, self.y_train) self.json_dict["small_lab_dataset_nn_acc"] = temp """ @fit_tot: trains the neural network on the total data """ def fit_tot(self): temp = self.nn_fit(self.X_tot, self.y_tot) self.json_dict["big_dataset_nn_acc"] = temp def fit_tot_mesh(): tableau = [] tabl = [] number_it = 10 temp = self.nn_fit(self.X_tot, self.y_tot) for i in range(number_it): probabs_values = self.model.predict(self.X_submit) tableau.append(self.probas_values) tabl = [(sum(x) / number_it) for x in zip(*tableau)] self.y_submit = np.array([np.argmax(i) for i in tabl]) """ @nn_fit: fits the neural network to input data X and y provided. """ def nn_fit(self, X, y): call_back_list = [] #call_back_list.append(keras.callbacks.TensorBoard(self.log_spec,histogram_freq=1,write_grads=True)) if (self.EARLY_STOP_MODE): call_back_list.append( EarlyStopping(patience=self.patience, verbose=1, mode='min', restore_best_weights=True)) self.model.fit(x=X, y=y, epochs=self.epochs, batch_size=self.batch_size, validation_data=(self.X_valid_lab, self.y_valid)) test_loss, aut_acc = self.model.evaluate(self.X_valid_lab, self.y_valid) y_temp = self.model.predict(self.X_submit) self.y_submit = np.array([np.argmax(i) for i in y_temp]) return aut_acc """ @complete_ublab: completes the unlabeled data by predicting the labels for it """ def complete_unlab(self): y_missing = self.model.predict(self.X_unlab) y_missing = np.array([np.argmax(i) for i in y_missing]) self.X_tot = np.concatenate((self.X_train_lab, self.X_unlab), axis=0) self.y_tot = np.concatenate((self.y_train, y_missing), axis=0) def probas_values(self): temp = self.nn_fit(self.X_train_lab, self.y_train) probas_val = self.model.predict_proba(self.X_unlab) return probas_val def mesh(self): tableau = [] number_it = 20 tabl = [] for i in range(number_it): tableau.append(self.probas_values()) tabl = [(sum(x) / number_it) for x in zip(*tableau)] #print((tabl[0])) predict = [] """" for x in tabl: for j in range(len(x)): if max(x) == x[j]: predict.append(j) """ #print(predict[0]) predict = [np.argmax(i) for i in tabl] y_missing = predict self.X_tot = np.concatenate((self.X_train_lab, self.X_unlab), axis=0) self.y_tot = np.concatenate((self.y_train, y_missing), axis=0) def filtered_mesh(self): tableau = [] number_it = 10 tabl = [] for i in range(number_it): tableau.append(self.probas_values()) tabl = [(sum(x) / number_it) for x in zip(*tableau)] THRESHOLD_PROBAS = 0.7 #Si la probabilité maximale est en dessous du threshold: truncated_tabl = [i for i in tabl if max(i) > THRESHOLD_PROBAS] print(len(truncated_tabl)) #Trouver les indices de ses points pour les enlever du unlabeled set. indices = [] for i in range(len(tabl)): if max(tabl[i]) <= THRESHOLD_PROBAS: indices.append(i) self.X_unlab_truncated = np.delete(self.X_unlab, indices, axis=0) print(len(self.X_unlab_truncated)) #print((tabl[0])) #print(predict[0]) #Faire la prédiction que sur les points au dessus du threshold: predict = [np.argmax(i) for i in truncated_tabl] y_missing = predict self.X_tot = np.concatenate((self.X_train_lab, self.X_unlab_truncated), axis=0) self.y_tot = np.concatenate((self.y_train, y_missing), axis=0) """ @build_output_name: provides the name of the output with all the parameters """ def build_output_name(self): self.output_name = (datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) if (self.JSON_MODE): if ('origin' in os.path.basename(os.path.normpath(sys.argv[1]))): self.output_name += '_OR_' nn_string = 'NN:' ss_string = 'SS:' for i in self.param_out: temp = (i + '=' + str(self.json_dict[i])) if (i in self.params_nn): nn_string += temp elif (i in self.params_ss): ss_string += temp else: self.output_name += temp self.output_name += ss_string if (self.USING_NN): self.output_name += nn_string """ @submission_formed: provides the good format to the submission file - predicted_y : the predicted values - name: name containing all parameters """ def submission_formed(self, predicted_y, name): result_dir = "./results" os.makedirs(result_dir, exist_ok=True) out = pd.DataFrame(predicted_y) out.insert(0, 'Id', range(30000, len(out) + 30000)) out.rename(columns={"Id": "Id", 0: "y"}, inplace=True) path = 'results/' + name + '.csv' out.to_csv(os.path.join(path), index=False) """ @save_to_csv: useful when self.datastate is set to 'save': save the datas obtained after the ss algorithm """ def save_to_csv(self, X_tot, y_tot, X_valid, y_valid): out_x = pd.DataFrame(X_tot) out_y = pd.DataFrame(y_tot) out_xv = pd.DataFrame(X_valid) out_yv = pd.DataFrame(y_valid) os.makedirs('./saved_datas', exist_ok=True) path_x = 'saved_datas/X_tot.csv' path_y = 'saved_datas/y_tot.csv' path_xv = 'saved_datas/X_valid.csv' path_yv = 'saved_datas/y_valid.csv' out_x.to_csv(os.path.join(path_x), index=False) out_y.to_csv(os.path.join(path_y), index=False) out_xv.to_csv(os.path.join(path_xv), index=False) out_yv.to_csv(os.path.join(path_yv), index=False) """ @load_xy: when self.datastate is set to 'load', loads data from saved data """ def load_xy(self): print('Loading the X and y...') self.X_valid_lab = (pd.read_csv('saved_datas/X_valid.csv')).to_numpy() self.y_valid = (pd.read_csv('saved_datas/y_valid.csv')).to_numpy() self.X_tot = (pd.read_csv('saved_datas/X_tot.csv')).to_numpy() self.y_tot = (pd.read_csv('saved_datas/y_tot.csv')).to_numpy() """@out: final output of the programm """ def out(self): self.submission_formed(self.y_submit, self.output_name) with open(self.log_spec + '/recap.json', 'w') as fp: json.dump(self.json_dict, fp, indent=1) print( '########################################DONE##################################' ) print("\n")
class YApplyTimeSeries(object): def __init__(self): # data prepare self.__df = None self.__train_feature_label, self.__test_feature_label = None, None self.__train_feature, self.__train_label = None, None self.__test_feature, self.__test_label = None, None self.__mms = None # function set self.__net = None # optimizer function # pick the best function def data_prepare(self): self.__df = pd.read_csv("C:\\Users\\Dell\\Desktop\\time_series.csv", encoding="utf-16") self.__df = self.__df.dropna() self.__train_feature_label = self.__df.loc[( self.__df["is_oot"] == 0), :] self.__test_feature_label = self.__df.loc[( self.__df["is_oot"] == 1), :] self.__train_feature_label = self.__train_feature_label.drop( ["id_no", "is_oot"], axis=1) self.__test_feature_label = self.__test_feature_label.drop( ["id_no", "is_oot"], axis=1) self.__train_feature = self.__train_feature_label[[ i for i in self.__train_feature_label.columns if i != "is_overdue" ]].values self.__train_label = self.__train_feature_label["is_overdue"].values self.__test_feature = self.__test_feature_label[[ i for i in self.__test_feature_label.columns if i != "is_overdue" ]].values self.__test_label = self.__test_feature_label["is_overdue"].values # 标准化 self.__mms = MinMaxScaler() self.__mms.fit(self.__train_feature) self.__train_feature = self.__mms.transform(self.__train_feature) self.__test_feature = self.__mms.transform(self.__test_feature) # reshape samples × input_length × input_dim self.__train_feature = self.__train_feature.reshape((-1, 5, 3)) self.__test_feature = self.__test_feature.reshape((-1, 5, 3)) def function_set(self): self.__net = Sequential() self.__net.add(GRU(units=5, input_length=5, input_dim=3)) self.__net.add(Dense(units=1, activation="sigmoid")) def optimizer_function(self): self.__net.summary() self.__net.compile(loss=keras.losses.binary_crossentropy, optimizer=keras.optimizers.Adam(), metrics=["accuracy"]) def pick_the_best_function(self): self.__net.fit(self.__train_feature, self.__train_label, epochs=2, batch_size=256) print( roc_auc_score(self.__test_label, self.__net.predict_proba(self.__test_feature)))
class DeepLearning: def __init__(self, x_shape): from keras import Sequential from keras.callbacks import EarlyStopping from keras.layers import Dense, Dropout from keras.regularizers import l1_l2 self.early_stopping = EarlyStopping( monitor='val_loss', min_delta=0, patience=50, mode='min', verbose=1, ) self.classifier = Sequential() self.classifier.add( Dense(300, kernel_initializer="he_normal", activation="elu", input_dim=x_shape)) self.classifier.add(Dropout(0.3)) self.classifier.add( Dense(450, kernel_initializer='he_normal', activation='elu')) self.classifier.add(Dropout(0.3)) self.classifier.add( Dense(100, kernel_initializer='he_normal', activation='elu')) self.classifier.add(Dropout(0.3)) self.classifier.add( Dense(20, kernel_initializer='he_normal', activation='elu', kernel_regularizer=l1_l2())) self.classifier.add( Dense(1, kernel_initializer='uniform', activation="sigmoid", activity_regularizer=l1_l2(0.005, 0.005))) self.classifier.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # TODO: implementation using keras def learn(self, x_train, y_train, x_test, batch_size=10, is_stand_ml=False, is_al=False): self.classifier.fit(x_train, y_train, validation_split=0.1, callbacks=[self.early_stopping], epochs=100, batch_size=64, verbose=0) probs = self.classifier.predict_proba(x_test) if is_stand_ml: return probs if is_al: certainty = [abs(a - 0.5) for a in probs] return np.argpartition(certainty, -batch_size)[-batch_size:] return np.argpartition(probs[:, 1], -batch_size)[-batch_size:]
class Agent: # name should contain only letters, digits, and underscores (not enforced by environment) __name = 'Based_Agent' def __init__(self, stateDim, actionDim, agentParams): self.__stateDim = stateDim self.__actionDim = actionDim self.__action = np.random.random(actionDim) self.__step = 0 self.__alpha = 0.001 self.__gamma = 0.9 self.__decision_every = 6 self.__explore_probability = 0.2 self.__max_replay_samples = 20 self.__features = Features() self.__previous_action = None self.__current_out = None self.__previous_out = None self.__previous_meta_state = None self.__previous_state = None self.__test = agentParams[0] if agentParams else None self.__exploit = False self.__segments = 2 self.__actions = 3**self.__segments try: self.__net = load_model('net') except: print('Creating new model') self.__net = Sequential([ Dense(50, activation='elu', input_dim=self.__features.dim), Dense(30, activation='elu'), Dense(self.__actions), Reshape((self.__actions, 1)) ]) self.__net.compile(optimizer=SGD(lr=self.__alpha), loss='mean_squared_error', sample_weight_mode='temporal') try: self.__replay = Replay.load('replay') except Exception as a: self.__replay = Replay(self.__actions) self.__replay_X = [] self.__replay_Y = [] def start(self, state): self.__previous_state = state self.__choose_action(state) self.__previous_out = self.__current_out return self.__action def step(self, reward, state): self.__previous_state = state self.__step += 1 if self.__step % self.__decision_every != 0: return self.__action self.__choose_action(state) if not self.__exploit: max_q = self.__current_out[np.argmax(self.__current_out)] self.__update_q(reward - self.__features.min_dist(state) / 100, max_q) self.__previous_out = self.__current_out return self.__action def end(self, reward): if not self.__exploit: self.__update_q(reward, reward) self.__replay.submit(self.__test, (self.__replay_X, self.__replay_Y), self.__step) self.__net.save('net') self.__replay.save('replay') def cleanup(self): pass def getName(self): return self.__name def __choose_action(self, state): meta_state = np.asarray(self.__features.get_features(state), dtype='float').reshape((1, self.__features.dim)) out = self.__net.predict_proba([meta_state], batch_size=1)[0].flatten() self.__current_out = out if self.__exploit or self.__explore_probability < np.random.random(): # take best action action = np.argmax(out) else: # take random action action = np.random.randint(0, self.__actions) self.__previous_action = action self.__previous_meta_state = meta_state self.__meta_to_action(action) def __update_q(self, reward, max_q): teach_out = self.__previous_out teach_out[self.__previous_action] = reward + self.__gamma * max_q # sampling from infinite stream if len(self.__replay_X) < self.__max_replay_samples: self.__replay_X.append(self.__previous_meta_state) self.__replay_Y.append((teach_out[self.__previous_action], self.__previous_action)) elif np.random.random() < self.__max_replay_samples/self.__step: to_replace = np.random.randint(0, self.__max_replay_samples) self.__replay_X[to_replace] = self.__previous_meta_state self.__replay_Y[to_replace] = (teach_out[self.__previous_action], self.__previous_action) self.__net.fit([self.__previous_meta_state], [teach_out.reshape(1, self.__actions, 1)], verbose=0) replay_x, replay_y, replay_w = self.__replay.get_training() if replay_x: data = list(zip(replay_x, replay_y, replay_w)) np.random.shuffle(data) for x, y, w in data: self.__net.fit([x], [y], sample_weight=[w], verbose=0) def __meta_to_action(self, meta): self.__action[:] = 0 for segment in range(self.__segments): segment_action = meta % 3 muscle_start = 30 * segment // self.__segments muscle_stop = 30 * (segment+1) // self.__segments if segment_action == 0: self.__action[muscle_start:muscle_stop:3] = 1 if segment_action == 1: self.__action[muscle_start+1:muscle_stop:3] = 1 if segment_action == 2: self.__action[muscle_start+2:muscle_stop:3] = 1 meta //= 3
fpr_keras, tpr_keras, thereshold_keras = roc_curve(y_test, y_test_pred) auc_keras = auc(fpr_keras, tpr_keras) print('Testing data AUC', auc_keras) # ROC curve for testing data plt.figure(1) plt.plot([0, 1], [0, 1], 'k--') plt.plot(fpr_keras, tpr_keras, labels='Keras (area={:.3f})').format(auc_keras) plt.xlabel('False positive rate') plt.ylabel('True Positive rate') plt.title('ROC Curve') plt.legend(loc='best') plt.show() # AOC score of training y_train_pred = model.predict_proba(X_train) fpr_keras, tpr_keras, thereshold_keras = roc_curve(y_train, y_train_pred) auc_keras = auc(fpr_keras, tpr_keras) print('Training data AUC:', auc_keras) # ROC curve of training plt.figure(1) plt.plot([0, 1], [0, 1], 'k--') plt.plot(fpr_keras, tpr_keras, label='Keras (area={:.3f})'.format(auc_keras)) plt.xlabel('False Positive rate') plt.ylabel('True Positive rate') plt.title('ROC Curve') plt.legend(loc='best') plt.show() # make y_train categorical and assign it to y_train_cat
if float(conf_mat[0,0]+conf_mat[0,1])!=0: precision = float(conf_mat[0,0])/float(conf_mat[0,0]+conf_mat[0,1]) #f1 score = 0 f1_score = 2*(float(precision*recall)/float(precision+recall)) print("confusion matrix") print("----------------------------------------------") print("accuracy") print("%.6f" %accuracy) print("racall") print("%.6f" %recall) print("precision") print("%.6f" %precision) print("f1score") print("%.6f" %f1) y_pred_proba = model.predict_proba(xtest)[::,1] fpr, tpr, _ = metrics.roc_curve(y_true, y_pred_proba) auc = metrics.roc_auc_score(y_true, y_pred_proba) plt.plot(fpr,tpr,label="data 1, auc="+str(auc)) plt.legend(loc=4) plt.show() ##############################################################################3 # Get training and test loss histories training_loss = history.history['accuracy'] test_loss = history.history['val_accuracy'] # Create count of the number of epochs epoch_count = range(1, len(training_loss) + 1)
class Learning: def machine_learning(self, x_train, y_train, x_test, smallest_class, clf=None): # smallest class -> 1/0 if clf is None: # n_estimators - number of trees # balances - clf = RandomForestClassifier(n_estimators=200, class_weight="balanced") clf.fit(np.asmatrix(x_train, dtype=np.float32), y_train) probs = clf.predict_proba(x_test) # probs closer to 0 -> 0 return probs.argmax(0)[smallest_class] # smallest class black # TODO: implementation using keras def deep_learning(self, x_train, y_train, x_test, smallest_class): # pass from keras import Sequential from keras.callbacks import EarlyStopping from keras.layers import Dense, Dropout from keras.regularizers import l1_l2 # stop if there is no improvement early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=50, mode='min', verbose=1) self.classifier = Sequential() # he_normal - init weights normal self.classifier.add( Dense(300, kernel_initializer="he_normal", activation="relu", input_dim=x_train.shape[1])) self.classifier.add(Dropout(0.5)) self.classifier.add( Dense(100, kernel_initializer='he_normal', activation='relu', kernel_regularizer=l1_l2(0.5))) self.classifier.add(Dropout(0.5)) self.classifier.add( Dense(20, kernel_initializer='he_normal', activation='relu', kernel_regularizer=l1_l2(0.5))) self.classifier.add(Dropout(0.5)) self.classifier.add( Dense(1, kernel_initializer='uniform', activation="sigmoid", kernel_regularizer=l1_l2(0.1))) self.classifier.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) self.classifier.fit(x_train, y_train, validation_split=0.1, callbacks=[early_stopping], epochs=10, batch_size=520, verbose=0) probs = self.classifier.predict_proba(x_test) return probs.argmax()
# Create NN model model = Sequential() model.add(Dense(2, input_dim=2, activation='relu')) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer=SGD(lr=0.1)) print(model.summary()) # Training X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) y = np.array([[0], [1], [1], [0]]) model.fit(X, y, batch_size=1, epochs=1000, verbose=0) # Result print("Network test:") print("XOR(0,0):", model.predict_proba(np.array([[0, 0]]))) print("XOR(0,1):", model.predict_proba(np.array([[0, 1]]))) print("XOR(1,0):", model.predict_proba(np.array([[1, 0]]))) print("XOR(1,1):", model.predict_proba(np.array([[1, 1]]))) # Parameters layer 1 W1 = model.get_weights()[0] b1 = model.get_weights()[1] # Parameters layer 2 W2 = model.get_weights()[2] b2 = model.get_weights()[3] print("W1:", W1) print("b1:", b1) print("W2:", W2) print("b2:", b2)
y_predicted = model.predict(x_test) #print(y_predicted.shape) for i in range(test_instances): index = y_predicted[i].argmax() y_predicted[i] = [0, 0] y_predicted[i, index] = 1 accuracy = accuracy_score(y_test_matrix, y_predicted) print("accuracy = ", accuracy) print( 'Balanced accuracy: ', balanced_accuracy_score(y_test_matrix.argmax(axis=1), y_predicted.argmax(axis=1))) print('f measure = ', f1_score(y_test_matrix.argmax(axis=1), y_predicted.argmax(axis=1))) conf_matrix = confusion_matrix(y_test_matrix.argmax(axis=1), y_predicted.argmax(axis=1)) print("Confusion matrix : \n", conf_matrix) skplt.metrics.plot_roc(y_test, model.predict_proba(x_test), plot_macro=False, plot_micro=False, classes_to_plot=[1]) plt.show() """ fpr, tpr, thresholds = roc_curve(y_test_matrix.argmax(axis=1), y_predicted.argmax(axis=1)) plt.figure(2) plt.plot(fpr, tpr) plt.show() """
if float(conf_mat[0, 0] + conf_mat[0, 1]) != 0: precision = float(conf_mat[0, 0]) / float(conf_mat[0, 0] + conf_mat[0, 1]) #f1 score = 0 f1_score = 2 * (float(precision * recall) / float(precision + recall)) print("confusion matrix") print("----------------------------------------------") print("accuracy") print("%.3f" % accuracy) print("racall") print("%.3f" % recall) print("precision") print("%.3f" % precision) print("f1score") print("%.3f" % f1) y_pred_proba = classifier.predict_proba(xtest)[::, 1] fpr, tpr, _ = metrics.roc_curve(y_true, y_pred_proba) auc = metrics.roc_auc_score(y_true, y_pred_proba) plt.plot(fpr, tpr, label="data 1, auc=" + str(auc)) plt.legend(loc=4) plt.show() plt.plot(history.history['accuracy']) plt.plot(history.history['val_accuracy']) plt.title('Model accuracy') plt.ylabel('accuracy') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='upper left') plt.show() print("runtime:" + str(datetime.now() - start)) #print("runtime:" + str(time.time()-start))
end1 = time.clock() t1 = end1 - start1 model.save('my_model3.h5') #测试时间记录 start2 = time.clock() loss, accuracy = model.evaluate(x_test, y_test) end2 = time.clock() t2 = end2 - start2 #评估指标的计算 pre_y = model.predict_classes(x_test) y_test = np.array(y_test) metrics = classification_report(y_test, pre_y,digits=4) print(metrics) confusion_m = confusion_matrix(y_test, pre_y) y_pred_pro = model.predict_proba(x_test)[:, 0] fpr, tpr, thresholds = roc_curve(y_test, y_pred_pro, pos_label=1) roc_auc = auc(fpr, tpr) mat_plt(history) plot_confusion_matrix(confusion_m) roc(fpr, tpr, roc_auc) model.save('my_model3.h5') #计算fpr、fpr def fpr_tpr(confusion_m): sum = 0 count = 0 k = 0 lubao = [] for i in confusion_m: for j in i: sum = sum + j
print(train_labels[:10]) NUM_DIGITS = 10 trainLabels = utils.to_categorical(train_labels, NUM_DIGITS) testLabels = utils.to_categorical(test_labels, NUM_DIGITS) model = Sequential() model.add(Dense(units=128, activation=tf.nn.relu, input_shape=(FLATTEN_DIM, ))) model.add(Dense(units=64, activation=tf.nn.relu)) model.add(Dense(units=10, activation=tf.nn.softmax)) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) print(model.summary()) cb2 = TensorBoard(log_dir="logs/demo71", histogram_freq=0, write_graph=True, write_images=True) model.fit(trainImages, trainLabels, epochs=100, validation_data=(testImages, testLabels), callbacks=[cb1, cb2]) predictedLabels = model.predict_classes(testImages) print("result:", predictedLabels[:10]) predictedProbs = model.predict_proba(testImages) print("result:", predictedProbs[:10]) predicted = model.predict(testImages) print('result:', predicted[:10]) loss, accuracy = model.evaluate(testImages, testLabels) print("test accuracy:%.4f" % accuracy)
# cp_callback = keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, # save_weights_only=True, # verbose=1) ## Fit model for multiple labels and print accuracy ## 2296*1130 = 2594485-5 # history= model.fit(X_train, Y_train, validation_split=0.3,batch_size=10000, epochs=50,callbacks=[cp_callback]) history = model.fit(X_train, Y_train, validation_split=0.3, batch_size=10000, epochs=50, verbose=2) pred = model.predict(X_test, verbose=1) pred_proba = model.predict_proba(X_test) pred[pred >= 0.5] = 1 pred[pred < 0.5] = 0 # print('pred: ', pred) # print('Y_test: ', Y_test) conf_mat = multilabel_confusion_matrix(Y_test, pred) # print('conf mat: ') # print(conf_mat) # summarize history for accuracy ExtraSensoryHelperFunctions.PlotEpochVsAcc(plt, history) # summarize history for loss ExtraSensoryHelperFunctions.PlotEpochVsLoss(plt, history)
class SafetyModelByCnnRandomForestStack(SafetyModel): MODEL_TYPE = 'safety-cnn-rf-v0' CNN_FEATURES = [ 'acceleration_x', 'acceleration_y', 'acceleration_z', 'acceleration_gravity_diff_magnitude', 'Bearing', 'gyro_x_filtered', 'gyro_y_filtered', 'gyro_z_filtered', 'gyro_filtered_magnitude', 'Speed', 'Accuracy', 'second', 'second_diff', 'orientation_theta', 'orientation_psi', 'orientation_phi' ] SEQUENCE_MAX_LEN = 200 def __init__(self): super(SafetyModelByCnnRandomForestStack, self).__init__(self.MODEL_TYPE) self._model_first = None self._model_second = None self._features = None def build(self, data: pd.DataFrame, label: pd.DataFrame): print('Preprocess data ...') train_label = self.preprocess_label(label) train_dataset_prep = SafetyModel._preprocess(data) print('Aggregate data ...') train_agg_data = self._aggregate_data(train_dataset_prep) print('Preprocess data - To CNN input format ...') train_dataset_cnn, train_booking_ids = self._to_cnn_dataset( train_dataset_prep) del train_dataset_prep # First Step: CNN Model cnn_model = self._create_model_cnn(train_dataset_cnn) cnn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['binary_accuracy']) train_label_cnn = pd.merge(train_booking_ids, train_label, on='bookingID').label callbacks_list = [EarlyStopping(monitor='binary_accuracy', patience=3)] print('Building CNN model ...') history = cnn_model.fit(train_dataset_cnn, np.array(train_label_cnn).reshape((-1, 1)), batch_size=4, epochs=50, callbacks=callbacks_list, validation_split=0.2, verbose=1) print('Done learning CNN model.') self._model_first = Sequential() for layer in cnn_model.layers[:-1]: self._model_first.add(layer) for layer in self._model_first.layers: layer.trainable = False # Second Step: Stacking Random Forest train_cnn_embed = self._model_first.predict_proba(train_dataset_cnn) agg_features = train_agg_data.columns[ train_agg_data.columns.str.contains("max|std|ratio")] train_data_stack = pd.concat([ pd.Series(train_booking_ids.bookingID), train_agg_data[agg_features], pd.DataFrame(train_cnn_embed, columns=[ 'cnn_result_' + str(i) for i in range(len(train_cnn_embed[0])) ]) ], axis=1) train_data_stack = pd.merge(train_data_stack, train_label, on='bookingID') self._features = train_data_stack.columns[ train_data_stack.columns != 'label'] self._model_second = RandomForestClassifier(n_estimators=200, random_state=0, min_samples_leaf=75) self._model_second.fit(train_data_stack[self._features], train_data_stack.label) def save(self, path: str): obj = { 'model_type': self._model_type, 'features': self._features, 'model_first': self._model_first, 'model_second': self._model_second } joblib.dump(obj, path, protocol=2) def load(self, path: str): obj = joblib.load(path) if obj['model_type'] != self.MODEL_TYPE: raise ValueError( 'Incompatible type to load. Expect {} but get {}'.format( self.MODEL_TYPE, obj['model_type'])) self._features = obj['features'] self._model_first = obj['model_first'] self._model_second = obj['model_second'] def predict(self, data: pd.DataFrame) -> pd.DataFrame: if (self._model_first is None) or (self._model_second is None): raise AttributeError( 'Model is not available. Build or load the model beforehand.') print('Preprocess data ...') test_dataset_prep = SafetyModel._preprocess(data) print('Aggregate data ...') test_agg_data = self._aggregate_data(test_dataset_prep) print('Preprocess data - To CNN input format ...') test_dataset_cnn, test_booking_ids = self._to_cnn_dataset( test_dataset_prep) del test_dataset_prep # First Step: CNN Model test_cnn_embed = self._model_first.predict_proba(test_dataset_cnn) # Second Step: Stacking Random Forest agg_features = test_agg_data.columns[ test_agg_data.columns.str.contains("max|std|ratio")] test_data_stack = pd.concat([ pd.Series(test_booking_ids.bookingID), test_agg_data[agg_features], pd.DataFrame(test_cnn_embed, columns=[ 'cnn_result_' + str(i) for i in range(len(test_cnn_embed[0])) ]) ], axis=1) prediction = self._model_second.predict_proba( test_data_stack[self._features]) prediction = prediction[:, np.argwhere( self._model_second.classes_ == 1)[0][0]] prediction_df = pd.DataFrame(data={ 'bookingID': test_data_stack.bookingID, 'prediction': prediction }) return prediction_df @staticmethod def _create_model_cnn(dataset): num_seq = len(dataset[0]) num_features = len(dataset[0][0]) inpt = Input(shape=(num_seq, num_features)) convs = [] conv1 = Conv1D(8, 1, activation='relu')(inpt) pool1 = GlobalMaxPooling1D()(conv1) convs.append(pool1) conv2 = Conv1D(8, 3, activation='relu')(inpt) pool2_1 = AveragePooling1D(pool_size=5)(conv2) conv2_1 = Conv1D(16, 3, activation='relu')(pool2_1) pool2_2 = GlobalMaxPooling1D()(conv2_1) convs.append(pool2_2) out = Concatenate()(convs) first_segment_model = Model(inputs=[inpt], outputs=[out]) model = Sequential() model.add(first_segment_model) model.add(Dropout(0.2)) model.add(Dense(16, activation='sigmoid')) model.add(Dense(1, activation='sigmoid')) print(first_segment_model.summary()) print(model.summary()) return model def _to_cnn_dataset( self, preprocessed_dataset: pd.DataFrame) -> (list, pd.DataFrame): data_cnn = preprocessed_dataset.copy() data_cnn[['acceleration_x', 'acceleration_y', 'acceleration_z']] = \ data_cnn[['acceleration_x', 'acceleration_y', 'acceleration_z']] / 10.0 data_cnn[['gyro_x_filtered', 'gyro_y_filtered', 'gyro_z_filtered']] = \ data_cnn[['gyro_x_filtered', 'gyro_y_filtered', 'gyro_z_filtered']] data_cnn['Bearing'] = data_cnn['Bearing'] / 360.0 data_cnn[['orientation_theta', 'orientation_psi', 'orientation_phi']] = \ data_cnn[['orientation_theta', 'orientation_psi', 'orientation_phi']] / 180.0 data_cnn['Speed'] = data_cnn['Speed'] / 35.0 data_cnn['second'] = data_cnn['second'] / 1750.0 data_cnn['second_diff'] = data_cnn['second_diff'] / 30.0 data_cnn['Accuracy'] = data_cnn['Accuracy'] / 15.0 data_cnn, booking_ids = self._to_keras_input(data_cnn, self.CNN_FEATURES, self.SEQUENCE_MAX_LEN) return data_cnn, booking_ids
def fitting(self): dim_row = self.lags # tiempo dim_col = 1 # features or chanels (Volume) output_dim = 3 # 3 for categorical #data = np.random.random((1000, dim_row, dim_col)) #clas = np.random.randint(3, size=(1000, 1)) ##print(clas) #clas = to_categorical(clas) ##print(clas) data = self.X_train data_test = self.X_test data = data.values.reshape(-1, dim_row, dim_col) data_test = data_test.values.reshape(-1, dim_row, dim_col) clas = self.y_train clas_test = self.y_test clas = to_categorical(clas) clas_test = to_categorical(clas_test) cat0 = self.y_train.tolist().count(0) cat1 = self.y_train.tolist().count(1) cat2 = self.y_train.tolist().count(2) print("may: ", cat1, " ", "menor: ", cat2, " ", "neutro: ", cat0) n_samples_0 = cat0 n_samples_1 = (cat1 + cat2)/2.0 n_samples_2 = (cat1 + cat2)/2.0 class_weight={ 0: 1.0, 1: n_samples_0/n_samples_1, 2: n_samples_0/n_samples_2} def class_1_accuracy(y_true, y_pred): # cojido de: http://www.deepideas.net/unbalanced-classes-machine-learning/ class_id_true = K.argmax(y_true, axis=-1) class_id_preds = K.argmax(y_pred, axis=-1) accuracy_mask = K.cast(K.equal(class_id_preds, 1), 'int32') class_acc_tensor = K.cast(K.equal(class_id_true, class_id_preds), 'int32') * accuracy_mask class_acc = K.sum(class_acc_tensor) / K.maximum(K.sum(accuracy_mask), 1) return class_acc class SecondOpinion(Callback): def __init__(self, model, x_test, y_test, N): self.model = model self.x_test = x_test self.y_test = y_test self.N = N self.epoch = 1 def on_epoch_end(self, epoch, logs={}): if self.epoch % self.N == 0: y_pred = self.model.predict(self.x_test) pred_T = 0 pred_F = 0 for i in range(len(y_pred)): if np.argmax(y_pred[i]) == 1 and np.argmax(self.y_test[i]) == 1: pred_T += 1 if np.argmax(y_pred[i]) == 1 and np.argmax(self.y_test[i]) != 1: pred_F += 1 if pred_T + pred_F > 0: Pr_pos = pred_T/(pred_T + pred_F) print("Yoe: epoch, Probabilidad pos: ", self.epoch, Pr_pos) else: print("Yoe Probabilidad pos: 0") self.epoch += 1 ################################################################################################################# model = Sequential() # model.add(Reshape(input_shape=(dim_row, dim_col), target_shape=(dim_row, dim_col, 1))) if self.nConv > 0: #model.add(Reshape((dim_row, dim_col, 1))) model.add(Reshape(input_shape=(dim_row, dim_col), target_shape=(dim_row, dim_col, 1))) for i in range(self.nConv): model.add(Convolution2D(self.conv_nodes, kernel_size = (self.kernel_size, 1), padding = 'same', kernel_regularizer = regularizers.l2(0.01))) model.add(Activation('relu')) model.add(Reshape(target_shape=(dim_row, self.conv_nodes * dim_col))) # Como nuestro output tiene una sola dimension no es necesario "return_sequences='True'" # y tampoco es necesario usar TimeDistributed if self.nConv == 0: model.add(LSTM(units=self.lstm_nodes, return_sequences=True, activation='tanh', input_shape=(dim_row, dim_col))) for i in range(self.nLSTM - 1): model.add(LSTM(units=self.lstm_nodes, return_sequences=True, activation='tanh')) model.add(Dropout(0.5)) model.add(TimeDistributed(Dense(units = output_dim))) # the dimension of index one will be considered to be the temporal dimension model.add(Activation('softmax')) # for loss = 'categorical_crossentropy' #model.add(Activation('sigmoid')) # for loss = 'binary_crossentropy' # haciendo x: x[:, -1, :], la segunda dimension desaparece quedando solo # los ULTIMOS elementos (-1) de dicha dimension: # Try this to see: # data = np.random.random((5, 3, 4)) # print(data) # print(data[:, -1, :]) model.add(Lambda(lambda x: x[:, -1, :], output_shape = [output_dim])) print(model.summary()) tensorboard_active = False val_loss = False second_opinion = True callbacks = [] if tensorboard_active: callbacks.append(TensorBoard( log_dir=self.putmodel + "Tensor_board_data", histogram_freq=0, write_graph=True, write_images=True)) if val_loss: callbacks.append(EarlyStopping( monitor='val_loss', patience=5)) if second_opinion: callbacks.append(SecondOpinion(model, data_test, clas_test, 10)) #model.compile(loss = 'categorical_crossentropy', optimizer='Adam', metrics = ['categorical_accuracy']) #model.compile(loss = 'binary_crossentropy', optimizer=Adam(lr=self.learning), metrics = ['categorical_accuracy']) model.compile(loss = 'categorical_crossentropy', optimizer='Adam', metrics = [class_1_accuracy]) model.fit(x=data, y=clas, batch_size=self.batch_size, epochs=800, verbose=2, callbacks = callbacks, class_weight = class_weight) #validation_data=(data_test, clas_test)) ##################################################################################################################### # serialize model to YAML model_yaml = model.to_yaml() with open("model.yaml", "w") as yaml_file: yaml_file.write(model_yaml) # serialize weights to HDF5 model.save_weights("model.h5") print("Saved model to disk") # # load YAML and create model # yaml_file = open('model.yaml', 'r') # loaded_model_yaml = yaml_file.read() # yaml_file.close() # loaded_model = model_from_yaml(loaded_model_yaml) # # load weights into new model # loaded_model.load_weights("model.h5") # print("Loaded model from disk") # loaded_model.compile(loss = 'categorical_crossentropy', optimizer='Adam', metrics = [class_1_accuracy]) # print("Computing prediction ...") y_pred = model.predict_proba(data_test) model.reset_states() print("Computing train evaluation ...") score_train = model.evaluate(data, clas, verbose=2) print('Train loss:', score_train[0]) print('Train accuracy:', score_train[1]) model.reset_states() # score_train_loaded = loaded_model.evaluate(data, clas, verbose=2) # loaded_model.reset_states() # print('Train loss loaded:', score_train[0]) # print('Train accuracy loaded:', score_train[1]) print("Computing test evaluation ...") score_test = model.evaluate(data_test, clas_test, verbose=2) print('Test loss:', score_test[0]) print('Test accuracy:', score_test[1]) model.reset_states() # score_test_loaded = loaded_model.evaluate(data_test, clas_test, verbose=2) # loaded_model.reset_states() # print('Test loss loaded:', score_test[0]) # print('Test accuracy loaded:', score_test[1]) pred_T = 0 pred_F = 0 for i in range(len(y_pred)): if np.argmax(y_pred[i]) == 1 and np.argmax(clas_test[i]) == 1: pred_T += 1 # print(y_pred[i]) if np.argmax(y_pred[i]) == 1 and np.argmax(clas_test[i]) != 1: pred_F += 1 if pred_T + pred_F > 0: Pr_pos = pred_T/(pred_T + pred_F) print("Yoe Probabilidad pos: ", Pr_pos) else: print("Yoe Probabilidad pos: 0") history = DataFrame([[self.skip, self.nConv, self.nLSTM, self.learning, self.batch_size, self.conv_nodes, self.lstm_nodes, score_train[0], score_train[1], score_test[0], score_test[1]]], columns = ('Skip', 'cConv', 'nLSTM', 'learning', 'batch_size', 'conv_nodes', 'lstm_nodes', 'loss_train', 'acc_train', 'loss_test', 'acc_test')) self.history = self.history.append(history)
classifier.add(Dense(1, activation='sigmoid')) #Compiling the NN classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) #Fit the data classifier.fit(x_train, y_train, batch_size=10, epochs=10) # Predicting y_pred = classifier.predict(x_test) y_pred = np.round(y_pred) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) # Probability estimate of all classes(two class) for the test data: y_pred_prob = classifier.predict_proba(X_ts) #CSV results from pandas import DataFrame column = ['Probability of belonging to class 1 - NN'] dic = dict(zip(column, [y_pred_prob.tolist()])) df = DataFrame(dic) export_csv = df.to_csv('NN.csv', columns=column)