Ejemplo n.º 1
0
def recognize_cnn(face,
                  model_name,
                  filepath='fitted_models/',
                  ext='',
                  return_name=True):

    people = pickle.load(open(filepath + 'ids_' + model_name + '.sav', 'rb'))

    X = face / 255
    X = X.reshape(1, 100, 100, 3)

    model = Sequential()
    model.add(
        Conv2D(32,
               kernel_size=(3, 3),
               activation='relu',
               input_shape=(100, 100, 3)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.15))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.35))
    model.add(Dense(len(people), activation='sigmoid'))
    model.load_weights(filepath + model_name + ext)
    if return_name == True:
        predictions = model.predict_proba(X)[0]
        return people[np.where(predictions == max(predictions))[0][0]]
    else:
        return model.predict(X)
Ejemplo n.º 2
0
class SimpleNN(BaseModel):
    def train(self, scale=True):
        self.scaler.fit(self.x)
        self.x = self.scaler.transform(self.x)

        x_train, x_test, y_train, y_test = train_test_split(self.x,
                                                            self.y,
                                                            shuffle=True)
        callbacks = [
            EarlyStopping(monitor='val_loss', restore_best_weights=True)
        ]
        self.model = Sequential()
        self.model.add(
            Dense(500, activation='relu', input_dim=x_train.shape[1]))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(500, activation='relu'))
        self.model.add(Dense(1, activation='sigmoid'))
        self.model.compile(loss=keras.losses.binary_crossentropy,
                           optimizer=Adam(decay=0.001, amsgrad=True),
                           metrics=['acc'])
        self.model.fit(x_train,
                       y_train,
                       validation_data=(x_test, y_test),
                       epochs=50000,
                       verbose=1,
                       callbacks=callbacks,
                       batch_size=256)

    def predict(self, test):
        test = self.scaler.transform(test)
        return self.model.predict_proba(test)
Ejemplo n.º 3
0
class F22Cnn():
    def fit(self,
            session_dir=None,
            X_train=None,
            y_train=None,
            X_test=None,
            y_test=None,
            epochs=0,
            batch_size=0):
        mean_px = X_train.mean().astype(np.float32)
        std_px = X_train.std().astype(np.float32)

        def standardize(x):
            return (x - mean_px) / std_px

        num_classes = len(set(y_train))
        print('num_classes: {}'.format(num_classes))

        self.clf = Sequential([
            Lambda(standardize, input_shape=(28, 28, 1)),
            Convolution2D(32, (3, 3), activation='relu'),
            BatchNormalization(axis=1),
            Convolution2D(32, (3, 3), activation='relu'),
            MaxPooling2D(),
            BatchNormalization(axis=1),
            Convolution2D(64, (3, 3), activation='relu'),
            BatchNormalization(axis=1),
            Convolution2D(64, (3, 3), activation='relu'),
            MaxPooling2D(),
            Flatten(),
            BatchNormalization(axis=1),
            Dense(512, activation='relu'),
            Dense(10, activation='softmax'),
        ])

        self.clf.compile(Adam(),
                         loss='categorical_crossentropy',
                         metrics=['accuracy'])

        self.batch_size = batch_size

        self.clf, history = fit_cnn(self.clf, X_train, y_train, X_test, y_test,
                                    epochs, session_dir, self.batch_size)

        return history

    def score(self, X, y):
        y_cat = to_categorical(y)
        logging.info("Metric names: {}".format(self.clf.metrics_names))
        return self.clf.evaluate(X, y_cat, batch_size=self.batch_size)[1]

    def predict_proba(self, X):
        return self.clf.predict_proba(X)
Ejemplo n.º 4
0
def neural_network(x_tr, y_tr, x_te, y_te, dum=False, min_max=False):
    start = time.clock()
    if dum:
        x_tr = data_std(x_tr, min_max=False)
        x_te = data_std(x_te, min_max=False)
    y_tr_dm = data_propr(y_tr, name=False)
    y_te_dm = data_propr(y_te, name=False)
    init = initializers.glorot_uniform(seed=1)
    simple_adam = optimizers.Adam()
    model = Sequential()
    model.add(
        Dense(units=5,
              input_dim=x_te.shape[1],
              kernel_initializer=init,
              activation='relu'))
    model.add(Dropout(1))
    model.add(Dense(units=6, kernel_initializer=init, activation='sigmoid'))
    model.add(Dropout(1))
    model.add(Dense(units=5, kernel_initializer=init, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer=simple_adam,
                  metrics=['accuracy'])
    model.fit(x_tr,
              y_tr_dm,
              verbose=0,
              class_weight={
                  0: 0.05,
                  1: 0.05,
                  2: 0.49,
                  3: 0.499,
                  4: 0.01
              })
    #返回模型的基础结构
    NN_class_repot = classification_report(model.predict_classes(x_te), y_te)
    NN_class_con = confusion_matrix(model.predict_classes(x_te), y_te)
    NN_class_pred = model.predict_classes(x_te)
    NN_class_pred_prob = model.predict_proba(x_te)
    #输出精确度
    print('神经网络耗时:', end='--')
    print(model.evaluate(x_te, y_te_dm))
    #简单绘制模型的roc曲线
    poc_plt(y_te_dm, NN_class_pred_prob)
    end = time.clock()
    #计算模型耗时
    print('神经网络耗时:%f' % (end - start))
    return NN_class_repot, NN_class_con, NN_class_pred, NN_class_pred_prob, model
Ejemplo n.º 5
0
def recognize_lh(face,
                 model_name,
                 filepath='fitted_models/',
                 ext='',
                 return_name=True):

    people = pickle.load(open(filepath + 'ids_' + model_name + '.sav', 'rb'))

    X = face / 255
    X = X.reshape(1, 100, 100, 3)

    model = Sequential()
    # First convolutional layer, note the specification of shape
    model.add(
        Conv2D(96,
               kernel_size=(7, 7),
               activation='relu',
               input_shape=(100, 100, 3)))
    model.add(MaxPooling2D(pool_size=(3, 3)))
    model.add(Conv2D(256, (5, 5), activation='relu'))
    model.add(MaxPooling2D(pool_size=(3, 3)))
    model.add(Conv2D(384, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(3, 3)))
    #model.add(Dropout(0.1))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(len(people), activation='softmax'))

    model.load_weights(filepath + model_name + ext)

    if return_name == True:
        predictions = model.predict_proba(X)[0]
        return people[np.where(predictions == max(predictions))[0][0]]
    else:
        return model.predict(X)
Ejemplo n.º 6
0
class Model:
    def __init__(self,train_data,maxlen):
        self.train_data=train_data
        self.maxlen=maxlen
        self.embedding_model=None
        self.ml_model=None
        
    def load_data(self):
        """
        Loading the data into a dataframe

        Input
        path: path to the test data(String)

        Output
        train_data: return a pandas Dataframe
        """
        print(self.train_data.head())
    
    #referenced from https://stackoverflow.com/questions/16645799/how-to-create-a-word-cloud-from-a-corpus-in-python
    def show_wordcloud(self, title = None):
        """
        depicting wordclouds of the input data

        Input
        data: input pandas Dataframe
        """
        stopwords = set(STOPWORDS)
        wordcloud = WordCloud(
            background_color='white',
            stopwords=stopwords,
            max_words=200,
            max_font_size=40, 
            scale=3,
            random_state=1 # chosen at random by flipping a coin; it was heads
        ).generate(str(self.train_data))

        fig = plt.figure(1, figsize=(12, 12))
        plt.axis('off')
        if title: 
            fig.suptitle(title, fontsize=20)
            fig.subplots_adjust(top=2.3)

        plt.imshow(wordcloud)
        plt.show()

            
    
    def transform_data(self):
        """
        Factorizing the simplified lithologies into numerical equivalents

        Input
        data: input pandas dataframe

        Output
        tuple containing the transformed data
        """
        self.train_data['Lithology_original']=self.train_data['Lithology_original'].replace(np.nan,'',regex=True)
        self.train_data['Lithology_original'] =self.train_data['Lithology_original'].apply(preprocessor)
        self.train_data['Simplified_lithology']=self.train_data['Simplified_lithology'].replace(np.nan,'Unknown',regex=True)
        self.train_data['Simplified_lithology']=self.train_data['Simplified_lithology'].apply(preprocessor).astype(str)
        self.train_data['Simplified_lithology'],self.label=pd.factorize(self.train_data['Simplified_lithology'])
        self.list_of_descriptions=self.train_data['Lithology_original'].tolist()
        self.list_of_simple_lithology=self.train_data['Simplified_lithology'].tolist()
    
    
    def generate_embeddings(self):
        """
        Generating FastText(vectorized version of each word) model from the vocabulary in the data

        Input
        list_of_descriptions: transformed descriptions
        list_of_simple_lithology: transformed simple lithologies

        Output
        model: Gensim fasttext model

        """
        data=[]
        for x in self.list_of_descriptions:
            temp=[]
            if(isinstance(x,list)):
                for y in x:
                    temp.append(y.lower())
                data.append(temp)
        for x in self.list_of_simple_lithology:
            temp=[]
            if(isinstance(x,list)):
                for y in x:
                    temp.append(y.lower())
                data.append(temp)
            if(isinstance(x,float)):
                print(x)
        self.embedding_model=gensim.models.FastText(data,min_count=1,size=100,window=3)

    
    
    def split_data(self):
        """
        Splitting the data into train and test

        Input
        train_data: Pandas dataframe

        Output
        tuple containing train and test data 
        """
        msk = np.random.rand(len(self.train_data)) < 0.75
        self.train_X = self.train_data.Lithology_original[msk]
        self.test_X = self.train_data.Lithology_original[~msk]
        y=self.train_data['Simplified_lithology']
        self.train_y = y[msk]
        self.test_y=y[~msk]

        

    
    def tokenize_input_data(self):
        """
        Indexing each token in the descriptions

        Input
        train_X: list of input descriptions
        test_X : list of input descriptions

        Output
        Tuple containing indexed versions of the inputs
        """
        self.tokenizer=Tokenizer(num_words=3000)    
        self.tokenizer.fit_on_texts(self.train_X)
        self.train_X=self.tokenizer.texts_to_sequences(self.train_X)
        self.test_X=self.tokenizer.texts_to_sequences(self.test_X)
    
    
    def label_to_id(self):
        """
        Indexing each label in the target(simplified lithology)

        Input
        train_y: list of labels
        test_y: list of labels

        Output
        tuple containing indexed versions of the input
        """
        
        self.train_y=utils.to_categorical(self.train_y.tolist(),11,dtype='int')
        self.test_y=utils.to_categorical(self.test_y.tolist(),11,dtype='int')
    
    
    def pad_sentences(self):
        """
        Adding padding to the descriptions so that each description is of the same length(maxlen)

        Input
        train_X: list of descriptions
        test_X: list of descriptions
        maxlen: int (maximum length of the descriptions)

        Output
        Tuple containing transformed versions of the input
        """
        self.train_X= pad_sequences(self.train_X, padding='post', maxlen=self.maxlen)
        self.test_X= pad_sequences(self.test_X, padding='post', maxlen=self.maxlen)

    
    
    def create_embedding_matrix(self):
        """
        Creating an embedding matrix to be fed into the neural network

        Input
        model: gensim word2vec model

        embedding_matrix: matrix depicting the embeddings
        """
        self.embedding_matrix=np.zeros((len(self.embedding_model.wv.vocab),100))
        for x,_ in self.embedding_model.wv.vocab.items():
            if x in self.tokenizer.word_counts.keys():
                self.embedding_matrix[self.tokenizer.word_index[x]]=np.array(self.embedding_model.wv[x], dtype=np.float32)[:100]

        
    
    
    def define_learning_model(self):
        """
        Describing the deep learning model using Keras

        Input
        model:gensim word2vec model
        embedding_matrix: matrix of embeddings
        maxlen: maximum length of sentences

        Output
        lstm_model: deep learning model
        """
        self.ml_model=Sequential()
        self.ml_model.add(layers.Embedding(len(self.embedding_model.wv.vocab), 100, 
                                   weights=[self.embedding_matrix],
                                   input_length=self.maxlen,
                                   trainable=False))
        self.ml_model.add(layers.LSTM(100))
        #model.add(layers.Dropout(0.3))
        #model.add(layers.LSTM(100,activation='tanh',recurrent_activation='sigmoid'))
        self.ml_model.add(layers.Dropout(0.3))

        #model.add(layers.GlobalAveragePooling1D())
        self.ml_model.add(layers.Dense(11,activation='softmax'))
        #self.ml_model.add(layers.Softmax())
        #model.add(layers.Flatten())
        adam=optimizers.Adam(lr=0.001)
        self.ml_model.compile(optimizer=adam,
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
        self.ml_model.summary()
    
    
    def calculate_accuracy(self):
        """
        Calculating the accuracy of the model.

        Input
        train_X: list of descriptions
        train_y: list of labels

        Output:
        history: model after fitting the data

        """
        msk = np.random.rand(len(self.train_X)) < 0.75
        validation_data_X=self.train_X[~msk]
        validation_data_Y=self.train_y[~msk]
        self.history = self.ml_model.fit(self.train_X[msk],self.train_y[msk],
                            epochs=10,
                            verbose=2,
                           validation_data=(validation_data_X,validation_data_Y))
        _, accuracy = self.ml_model.evaluate(self.train_X, self.train_y, verbose=False)
        print("Training Accuracy: {:.4f}".format(accuracy))
        _, accuracy = self.ml_model.evaluate(self.test_X, self.test_y, verbose=False)
        print("Testing Accuracy:  {:.4f}".format(accuracy))


    
    
    #used as reference from https://www.tensorflow.org/tutorials/keras/basic_text_classification
    def plot_loss(self):
        """
        Plot the training and validation loss w.r.t epochs

        Input
        model: deep learning model
        """
        history_dict = self.history.history
        history_dict.keys()
        loss = history_dict['loss']
        val_loss = history_dict['val_loss']

        epochs = range(1, len(loss) + 1)

        # "bo" is for "blue dot"
        plt.plot(epochs, loss, 'bo', label='Training loss')
        # b is for "solid blue line"
        plt.plot(epochs, val_loss, 'b', label='Validation loss')
        plt.title('Training and validation loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()

        plt.show()
        
        
    
    #used as reference from https://www.tensorflow.org/tutorials/keras/basic_text_classification
    def plot_accuracy(self):
        """
        Plot the training and validation accuracy w.r.t epochs

        Input
        model: deep learning model
        """
        plt.clf()   # clear figure
        history_dict = self.history.history
        history_dict.keys()
        acc = history_dict['acc']
        val_acc = history_dict['val_acc']
        epochs = range(1, len(acc) + 1)
        plt.plot(epochs, acc, 'bo', label='Training acc')
        plt.plot(epochs, val_acc, 'b', label='Validation acc')
        plt.title('Training and validation accuracy')
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.legend()

        plt.show()
        
        
    def initialise_model(self):
        """
        
        Develop the model based on the input data
        
        """
        self.load_data()
        self.transform_data()
        self.generate_embeddings()
        self.split_data()
        self.tokenize_input_data()
        self.label_to_id()
        self.pad_sentences()
        self.create_embedding_matrix()
        self.define_learning_model()
        self.calculate_accuracy()
        
        
        
        
        
        
        
        
    def predict(self,data):
        """
        Predict simplified lithologies for input data
        
        """
        
        data['Description']=data['Description'].replace(np.nan,'',regex=True)
        data['Description']=data['Description'].astype(str)
        predict_X=self.tokenizer.texts_to_sequences(data['Description'])
        
        predict_X=pad_sequences(predict_X,padding='post',maxlen=self.maxlen)
        output=self.ml_model.predict_classes(predict_X)
        simplified_lithology=[]
        for x in output:
            simplified_lithology.append(self.label[x])
        data['Simplified_Lithology']=pd.Series(simplified_lithology)
        data.to_csv('prediction_file.csv',index=False)
        
        

    def predict_certainity(self,data):
        
        data['Description']=data['Description'].replace(np.nan,'',regex=True)
        data['Description']=data['Description'].astype(str)
        predict_X=self.tokenizer.texts_to_sequences(data['Description'])
        
        predict_X=pad_sequences(predict_X,padding='post',maxlen=self.maxlen)
        output=self.ml_model.predict_proba(predict_X)
        
        return output
Ejemplo n.º 7
0
                    verbose=2)
#Sistemin test verileri üzerinden test edilmesi
kayip_orani, dogruluk_orani = sinif.evaluate(Giris_test,
                                             Cikis_test,
                                             verbose=1,
                                             batch_size=10)
print(" ")
print(
    "--------Eğitilen sisteme sokulan Test verisinin accuracy ve loss oranları----------"
)
print(" ")
print('test loss oranı ', kayip_orani)
print('test accuracy oranı', dogruluk_orani)
print(" ")
#Sistemin test verisi üzerinden tahminde bulunarak sınıflandırma yapması
Cikis_tahmin = sinif.predict_proba(Giris_test)
#print(Cikis_tahmin)
Cikis_tahmin = (
    Cikis_tahmin > 0.5
)  #Tahmin edilen çıkış 0.5'in üzerinde ise sonucu 1(hasta), altında ise 0(sağlıklı) olarak kabul et.
#Roc curve plot fonksiyonu
from sklearn.metrics import roc_curve, auc


def plot_roc(tahmin, Cikis):
    fpr, tpr, _ = roc_curve(Cikis, tahmin)
    roc_auc = auc(fpr, tpr)
    plt.figure()
    plt.plot(fpr, tpr, label='ROC curve (area=%0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
Ejemplo n.º 8
0
model.load_weights('fitted_models/levi-hassner_006_minloss')

while True:

    _, img = cap.read()

    gs = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    faces = face_cascade.detectMultiScale(gs, 1.1, 6)

    for (x, y, w, h) in faces:
        cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 5)
        face = img[y:y + h, x:x + w]
        face = cv2.resize(face, (100, 100))
        face = face.reshape(1, 100, 100, 3)

        #identity = recognize_lh(face, 'levi-hassner_006', ext='_minloss')
        predictions = model.predict_proba(face)[0]
        identity = people[np.where(predictions == max(predictions))[0][0]]

        cv2.putText(img, identity, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 2,
                    (0, 255, 0), 5)

    cv2.imshow('img', img)

    k = cv2.waitKey(30) & 0xff

    if k == 27:
        break

cap.release()
Ejemplo n.º 9
0
)
checkpoint = ModelCheckpoint("keras_model.pt",
                             monitor='val_loss',
                             verbose=1,
                             save_best_only=True,
                             mode='auto')

model.fit(trainx,
          trainy,
          nb_epoch=params['nb_epochs'],
          batch_size=32,
          verbose=5,
          callbacks=[checkpoint, esp],
          validation_split=0.2)
model.load_weights("keras_model.pt")
pred_auc = model.predict_proba(testx, batch_size=64, verbose=0)
# accuracy
pred_auc = np.argmax(pred_auc, axis=1)
acc = accuracy_score(testy_org, pred_auc)
# mean squared error
#acc = mean_squared_error(testy, pred_auc)

trials = Trials()
import time
start = time.time()
best = fmin(f_nn, space, algo=tpe.suggest, max_evals=5, trials=trials)
end = time.time()
print('\n\n\n')
print('time: ', end - start)
print('best: ')
print(np.sqrt(trials.losses()))
Ejemplo n.º 10
0
    bagging_fraction=0.75,
    bagging_freq=5,
    bagging_seed=7,
    feature_fraction=0.5,
    feature_fraction_seed=7,
    verbose=-1,
    min_data_in_leaf=80,
    min_sum_hessian_in_leaf=11
)


# Fit the data
classifier.fit(X_train, y_train,)

# Make predictions on the hold out data
y_pred = (classifier.predict_proba(X_test)[:,1] >= 0.5).astype(int)

# Get the confusion matrix
print(confusion_matrix(y_test, y_pred))

# Get the accuracy score
print("Accuracy of {}".format(accuracy_score(y_test, y_pred)))

# Get the f1-Score
print("f1 score of {}".format(f1_score(y_test, y_pred)))

# Get the recall score
print("Recall score of {}".format(recall_score(y_test, y_pred)))

# Make predictions
predictions = (classifier.predict_proba(X_test_df)[:,1] >= 0.5).astype(int)
Ejemplo n.º 11
0
class SemiSupLabeler():
    """
    @_init_: initialises the model
    - data_lab:     labelled data
    - data_unlab:   unlabelled data
    - data_submit:  the submit version of the data
    """
    def __init__(self, data_lab, data_unlab, data_submit):
        ###########################Default parameters#####################
        #NB:if some mandatory parameters are lacking in the json, default values will be taken
        #list of all potential parameters
        """
      @params_nn: parameters of neural network 
        - loss :             loss used for the NN, cf the dictionnary above
        - optimizer:         Adam, SGD, etc
        - learning rate:     speaks for itself
        - metrics            accuracy, we wont change it normally
        - decay:             decay of the learning rate, generally of the order 1e-5
        - momentum:          momentum of the lr
        - patience:          number of epochs you wait if you use earlystopmode for the validation accuracy to increase again
        - layers:            shape of the network
      """
        self.params_nn = [
            'loss', 'optimizer', 'learning rate', 'metrics', 'decay',
            'momentum', 'batch_size', 'number of epochs', 'layers', 'patience'
        ]
        """
      @params_ss: parameters of label spreading
        - manyfit:           since the ss accuracy has some variance but doesnt take much to be computed, manyfit designs 
                             how many independant times we run it before averaging it in order to obtain a 
                             better estimation of the accuracy in question
        - ss_model:          'LabSpr' or 'LabProp'. So far, only LabSpr has converged
        - ss_kernel:         'knn' or 'rbf. So far only knn converges. ***WATCH OUT***: when using rbf, 
                             euler will complain that you use too much memory!!
        - gamma              parameter for the rbf
        - neighbor           parameter for knn
        - alpha              parameter for knn and rbf: tells at which point you will take the 
                             information of your neighbors into account
      """
        self.params_ss = [
            'UsingSS', 'manyfit', 'ss_model', 'ss_kernel', 'gamma', 'neighbor',
            'alpha'
        ]
        """
      @param_list: list of all parameters
        - Ratio:              ratio represented by the training set
        - pca:                number of principal components to use. if not present, no pca will be done
        - UsingNN:            if set to false, the NN is not used.
        - data_state:         'save' or 'load'. If you want to train the NN only without having to run the 
                              ss algo again, do one run with data_state to true, 
                              and use data_state= 'load for the next ones.
        - scaler:             'normal' or 'standard' describes the preprocessing before applying the pca
        - paramsout:          designates which parameters will be present in the output name 
                              ==> put the one you're playing with in order to easily see the difference
      """
        self.param_list = [
            'Ratio', 'pca', 'UsingNN', 'paramsout', 'data_state', 'scaler'
        ] + self.params_nn + self.params_ss

        self.param_out = ['Ratio', 'pca', 'optimizer', 'layers']

        self.data_lab = data_lab

        self.data_unlab = data_unlab

        self.data_submit = data_submit

        #--------------------- DATA IF NO JSON PROVIDED --------------------------

        #Training:
        self.RATIO = 0.9
        self.INPUT_DIM = 139

        #PCA:
        self.scaler = 'Standard'
        self.PCA_MODE = True
        self.pca = 50

        #Early stopping:
        self.EARLY_STOP_MODE = False
        self.patience = 50

        #NEURAL NETWORK:
        self.USING_NN = True

        self.USING_SS = False

        assert (self.USING_NN or self.USING_SS)

        self.loss = "sparse_categorical_crossentropy"

        self.opt = "SGD"

        self.lr = 0.001

        self.metric = "accuracy"

        self.decay = 0

        self.momentum = 0

        self.batch_size = 32

        self.epochs = 5

        self.lay_node = [("relu", 206), ('dropout', 0.33)]

        #Semi Supervised learning:
        self.datastate = 'save'

        self.ss_mod = 'LabSpr'

        self.ss_kern = 'knn'

        self.gamma = 20

        self.neighbors = 7

        self.alpha = 0.2

        self.manyfit = 1

        #-----------------------  JSON AS ARGUMENT: -------------------------

        #Checks wether the provided JSON is well formed:
        def check(inner, outer):
            for i in inner:
                if not (i in outer):
                    print('unknown parameter. abort.', i)

                    exit()

        self.JSON_MODE = (len(sys.argv) > 1)

        #In case a JSON was provided for the parameters:
        if (self.JSON_MODE):

            fn = sys.argv[1]

            if os.path.isfile(fn):

                print("successfully read the json file." + sys.argv[1])

                self.json_dict = json.load(open(fn))

                assert ('UsingNN' and 'paramsout' in self.json_dict)

                self.USING_NN = self.json_dict['UsingNN']

                self.USING_SS = self.json_dict['UsingSS']

                check(self.json_dict, self.param_list)

                check(self.json_dict['paramsout'], self.param_list)

                #iterate over the printed parameters and ensure they exist:
                self.param_out = self.json_dict['paramsout']

                self.RATIO = self.json_dict['Ratio']

                self.ss_mod = self.json_dict['ss_model']

                self.ss_kern = self.json_dict['ss_kernel']

                self.gamma = self.json_dict['gamma']

                self.neighbors = self.json_dict['neighbor']

                self.alpha = self.json_dict['alpha']

                self.datastate = self.json_dict['data_state']

                self.scaler = self.json_dict['scaler']

                if ('manyfit' in self.json_dict):

                    self.manyfit = self.json_dict['manyfit']

                if (self.USING_NN):
                    self.loss = self.json_dict['loss']

                    self.opt = self.json_dict['optimizer']

                    self.lr = self.json_dict['learning rate']

                    self.metric = self.json_dict['metrics']

                    self.decay = self.json_dict['decay']

                    self.momentum = self.json_dict['momentum']

                    self.batch_size = self.json_dict['batch_size']

                    self.epochs = self.json_dict['number of epochs']

                    lay_node = self.json_dict['layers']

                self.PCA_MODE = ('pca' in self.json_dict)

                if (self.PCA_MODE):
                    self.pca = self.json_dict['pca']

                    self.INPUT_DIM = self.pca

                self.EARLY_STOP_MODE = ('patience' in self.json_dict)

                if (self.EARLY_STOP_MODE):
                    self.patience = self.json_dict['patience']

            else:
                print("uncorrect path. abort.")

                print(sys.argv[1])

                exit()

        #if no JSON is provided, the values are taken from the code:
        else:
            print("taking the values of the code because no JSON was given.")

            #Dictionnary of all the values of parameters used:
            self.json_dict = {
                'Ratio': self.RATIO,
                'UsingNN': self.USING_NN,
                'UsingSS': self.USING_SS,
                'ss_model': self.ss_mod,
                'ss_kernel': self.ss_kern,
                'loss': self.loss,
                'optimizer': self.opt,
                'learning rate': self.lr,
                'metrics': self.metric,
                'decay': self.decay,
                'momentum': self.momentum,
                'batch_size': self.batch_size,
                'number of epochs': self.epochs,
                'gamma': self.gamma,
                'neighbor': self.neighbors,
                'alpha': self.alpha,
                'layers': self.lay_node,
                'manyfit': self.manyfit,
                'scaler': self.scaler
            }

            if (self.PCA_MODE):
                self.json_dict['pca'] = self.pca

                self.INPUT_DIM = self.pca

            if (self.EARLY_STOP_MODE):
                self.jsondict['patience'] = self.patience

        self.build_output_name()

        #Tensorboard/log part:
        self.logs_base_dir = "./logs"

        os.makedirs(self.logs_base_dir, exist_ok=True)

        self.log_spec = os.path.join(self.logs_base_dir, self.output_name)

        os.makedirs(self.log_spec, exist_ok=True)

        self.init_variables()

    """
    @label_spr: performs label spreading
    """

    def label_spr(self):

        RESULT_ACC_SS = 0

        for i in range(self.manyfit):

            #Initialisinig of variables:
            self.init_variables()

            #PCA preprocessing:
            if (self.PCA_MODE): self.pca_preprocess(self.pca)

            #Semi supervised algo
            if (self.ss_mod == 'LabSpr' and self.ss_kern == 'knn'):
                self.label_prop_model = LabelSpreading(
                    kernel='knn',
                    gamma=self.gamma,
                    n_neighbors=self.neighbors,
                    alpha=self.alpha)

            elif (self.ss_mod == 'LabProp' and self.ss_kern == 'rbf'):
                self.label_prop_model = LabelPropagation(
                    kernel='rbf',
                    gamma=self.gamma,
                    n_neighbors=self.neighbors,
                    alpha=self.alpha,
                    max_iter=10)
            else:
                self.label_prop_model = LabelPropagtion(
                    kernel=self.ss_kern,
                    gamma=self.gamma,
                    n_neighbors=self.neighbors)

            print('Starting to fit. Run for shelter!')

            self.label_prop_model.fit(self.X_tot, self.y_tot)

            temp_acc = self.label_prop_model.score(self.X_valid_lab,
                                                   self.y_valid)

            print('{} / {} :accuracy = {}'.format(i, self.manyfit, temp_acc))

            RESULT_ACC_SS += temp_acc

        self.y_tot = self.label_prop_model.transduction_

        self.y_submit = self.label_prop_model.predict(self.X_submit)

        if (self.datastate == "save"):
            self.save_to_csv(self.X_tot, self.y_tot, self.X_valid_lab,
                             self.y_valid)

        RESULT_ACC_SS /= self.manyfit

        self.json_dict['ss_accuracy'] = RESULT_ACC_SS

        print('accuracy obtained on the test set of the ss algo:',
              RESULT_ACC_SS)

    """
       @labelspr_predict: returns the predicion of the label spreading
    """

    def labelspr_predict(self, X):
        return self.label_prop_model.predict(X)

    """
       @init_variables : transforms the input data so that it is usable 
    """

    def init_variables(self):
        X_submit = self.data_submit.to_numpy()

        X_big_lab = (self.data_lab.to_numpy())[:, 1:]

        y_big = ((self.data_lab.to_numpy())[:, 0]).astype(int)

        X_train_lab, X_valid_lab, self.y_train, self.y_valid = train_test_split(
            X_big_lab, y_big, test_size=(1 - self.RATIO), random_state=14)

        X_unlab = self.data_unlab.to_numpy()

        X_tot = np.concatenate((X_train_lab, X_unlab), axis=0)

        self.y_tot = np.concatenate((self.y_train, np.full(len(X_unlab), -1)))

        if (self.scaler == 'Standard'):
            scaler = StandardScaler()

        elif (self.scaler == 'Normal'):
            scaler = Normalizer()

        else:
            scaler = StandardScaler()

        self.X_tot = scaler.fit_transform(X_tot)

        self.X_train_lab = scaler.transform(X_train_lab)

        self.X_unlab = scaler.transform(X_unlab)

        self.X_valid_lab = scaler.transform(X_valid_lab)

        self.X_submit = scaler.transform(X_submit)

    """@pca_preprocess: performs the preprocessing before the PCA
    """

    def pca_preprocess(self, number):
        pca_mod = PCA(n_components=number)

        self.X_tot = pca_mod.fit_transform(self.X_tot)

        self.X_train_lab = pca_mod.transform(self.X_train_lab)

        self.X_unlab = pca_mod.transform(self.X_unlab)

        self.X_valid_lab = pca_mod.transform(self.X_valid_lab)

        self.X_submit = pca_mod.transform(self.X_submit)

        self.INPUT_DIM = number

    """@build_model: creates the model of the neural network 
    """

    def build_model(self):
        self.model = Sequential()

        for counter, (name, num) in enumerate(self.lay_node):
            if (counter == 0):
                self.model.add(
                    Dense(num, activation='relu', input_dim=self.INPUT_DIM))

            elif (name == 'dropout'):
                self.model.add(Dropout(rate=num))

            elif (name == 'relu'):
                self.model.add(Dense(num, activation=tf.nn.relu))

            elif (name == 'relu_bn'):
                self.model.add(Dense(num))

                self.model.add(BatchNormalization())

                self.model.add(Activation('relu'))

            else:
                print('uncorrect name for the layers. exit.')
                exit()

        #Last layer of neural network:
        self.model.add(Dense(10, activation='softmax'))

        #optimizer
        if (self.opt == 'SGD'):
            optimiz = SGD(lr=self.lr, decay=self.decay, momentum=self.momentum)

        elif (self.opt == 'Adam'):
            optimiz = Adam(lr=self.lr, decay=self.decay)

        else:
            print('uncorrect name for the layers. exit.')

            exit()

        self.model.compile(optimizer=optimiz,
                           loss=self.loss,
                           metrics=[self.metric])

    """
    @fit_lab: trains the neural network on labeled data
    """

    def fit_lab(self):
        temp = self.nn_fit(self.X_train_lab, self.y_train)

        self.json_dict["small_lab_dataset_nn_acc"] = temp

    """
    @fit_tot: trains the neural network on the total data
    """

    def fit_tot(self):
        temp = self.nn_fit(self.X_tot, self.y_tot)

        self.json_dict["big_dataset_nn_acc"] = temp

    def fit_tot_mesh():
        tableau = []
        tabl = []
        number_it = 10

        temp = self.nn_fit(self.X_tot, self.y_tot)

        for i in range(number_it):
            probabs_values = self.model.predict(self.X_submit)
            tableau.append(self.probas_values)

        tabl = [(sum(x) / number_it) for x in zip(*tableau)]
        self.y_submit = np.array([np.argmax(i) for i in tabl])

    """
    @nn_fit: fits the neural network to input data X and y provided. 
    """

    def nn_fit(self, X, y):
        call_back_list = []

        #call_back_list.append(keras.callbacks.TensorBoard(self.log_spec,histogram_freq=1,write_grads=True))

        if (self.EARLY_STOP_MODE):
            call_back_list.append(
                EarlyStopping(patience=self.patience,
                              verbose=1,
                              mode='min',
                              restore_best_weights=True))
        self.model.fit(x=X,
                       y=y,
                       epochs=self.epochs,
                       batch_size=self.batch_size,
                       validation_data=(self.X_valid_lab, self.y_valid))

        test_loss, aut_acc = self.model.evaluate(self.X_valid_lab,
                                                 self.y_valid)

        y_temp = self.model.predict(self.X_submit)

        self.y_submit = np.array([np.argmax(i) for i in y_temp])

        return aut_acc

    """
    @complete_ublab: completes the unlabeled data by predicting the labels for it
    """

    def complete_unlab(self):
        y_missing = self.model.predict(self.X_unlab)

        y_missing = np.array([np.argmax(i) for i in y_missing])

        self.X_tot = np.concatenate((self.X_train_lab, self.X_unlab), axis=0)

        self.y_tot = np.concatenate((self.y_train, y_missing), axis=0)

    def probas_values(self):
        temp = self.nn_fit(self.X_train_lab, self.y_train)

        probas_val = self.model.predict_proba(self.X_unlab)

        return probas_val

    def mesh(self):
        tableau = []

        number_it = 20
        tabl = []

        for i in range(number_it):
            tableau.append(self.probas_values())

        tabl = [(sum(x) / number_it) for x in zip(*tableau)]

        #print((tabl[0]))
        predict = []
        """"
      for x in tabl:
        for j in range(len(x)):
          if max(x) == x[j]:
            predict.append(j)
      """

        #print(predict[0])
        predict = [np.argmax(i) for i in tabl]
        y_missing = predict

        self.X_tot = np.concatenate((self.X_train_lab, self.X_unlab), axis=0)

        self.y_tot = np.concatenate((self.y_train, y_missing), axis=0)

    def filtered_mesh(self):
        tableau = []

        number_it = 10

        tabl = []

        for i in range(number_it):
            tableau.append(self.probas_values())

        tabl = [(sum(x) / number_it) for x in zip(*tableau)]

        THRESHOLD_PROBAS = 0.7

        #Si la probabilité maximale est en dessous du threshold:
        truncated_tabl = [i for i in tabl if max(i) > THRESHOLD_PROBAS]
        print(len(truncated_tabl))

        #Trouver les indices de ses points pour les enlever du unlabeled set.
        indices = []

        for i in range(len(tabl)):
            if max(tabl[i]) <= THRESHOLD_PROBAS: indices.append(i)

        self.X_unlab_truncated = np.delete(self.X_unlab, indices, axis=0)
        print(len(self.X_unlab_truncated))

        #print((tabl[0]))
        #print(predict[0])

        #Faire la prédiction que sur les points au dessus du threshold:
        predict = [np.argmax(i) for i in truncated_tabl]

        y_missing = predict

        self.X_tot = np.concatenate((self.X_train_lab, self.X_unlab_truncated),
                                    axis=0)

        self.y_tot = np.concatenate((self.y_train, y_missing), axis=0)

    """ @build_output_name: provides the name of the output with all the parameters
    """

    def build_output_name(self):
        self.output_name = (datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

        if (self.JSON_MODE):
            if ('origin' in os.path.basename(os.path.normpath(sys.argv[1]))):
                self.output_name += '_OR_'

            nn_string = 'NN:'

            ss_string = 'SS:'

            for i in self.param_out:
                temp = (i + '=' + str(self.json_dict[i]))

                if (i in self.params_nn):
                    nn_string += temp

                elif (i in self.params_ss):
                    ss_string += temp

                else:
                    self.output_name += temp

            self.output_name += ss_string

            if (self.USING_NN):
                self.output_name += nn_string

    """
    @submission_formed: provides the good format to the submission file
    - predicted_y : the predicted values
    - name: name containing all parameters
    """

    def submission_formed(self, predicted_y, name):
        result_dir = "./results"

        os.makedirs(result_dir, exist_ok=True)

        out = pd.DataFrame(predicted_y)

        out.insert(0, 'Id', range(30000, len(out) + 30000))

        out.rename(columns={"Id": "Id", 0: "y"}, inplace=True)

        path = 'results/' + name + '.csv'

        out.to_csv(os.path.join(path), index=False)

    """
    @save_to_csv: useful when self.datastate is set to 'save': save the datas obtained after the ss algorithm
    """

    def save_to_csv(self, X_tot, y_tot, X_valid, y_valid):
        out_x = pd.DataFrame(X_tot)

        out_y = pd.DataFrame(y_tot)

        out_xv = pd.DataFrame(X_valid)

        out_yv = pd.DataFrame(y_valid)

        os.makedirs('./saved_datas', exist_ok=True)

        path_x = 'saved_datas/X_tot.csv'

        path_y = 'saved_datas/y_tot.csv'

        path_xv = 'saved_datas/X_valid.csv'

        path_yv = 'saved_datas/y_valid.csv'

        out_x.to_csv(os.path.join(path_x), index=False)

        out_y.to_csv(os.path.join(path_y), index=False)

        out_xv.to_csv(os.path.join(path_xv), index=False)

        out_yv.to_csv(os.path.join(path_yv), index=False)

    """
    @load_xy: when self.datastate is set to 'load', loads data from saved data
    """

    def load_xy(self):
        print('Loading the X and y...')

        self.X_valid_lab = (pd.read_csv('saved_datas/X_valid.csv')).to_numpy()

        self.y_valid = (pd.read_csv('saved_datas/y_valid.csv')).to_numpy()

        self.X_tot = (pd.read_csv('saved_datas/X_tot.csv')).to_numpy()

        self.y_tot = (pd.read_csv('saved_datas/y_tot.csv')).to_numpy()

    """@out: final output of the programm 
    """

    def out(self):
        self.submission_formed(self.y_submit, self.output_name)

        with open(self.log_spec + '/recap.json', 'w') as fp:
            json.dump(self.json_dict, fp, indent=1)

        print(
            '########################################DONE##################################'
        )

        print("\n")
Ejemplo n.º 12
0
class YApplyTimeSeries(object):
    def __init__(self):
        # data prepare
        self.__df = None
        self.__train_feature_label, self.__test_feature_label = None, None
        self.__train_feature, self.__train_label = None, None
        self.__test_feature, self.__test_label = None, None
        self.__mms = None

        # function set
        self.__net = None

        # optimizer function

        # pick the best function

    def data_prepare(self):
        self.__df = pd.read_csv("C:\\Users\\Dell\\Desktop\\time_series.csv",
                                encoding="utf-16")
        self.__df = self.__df.dropna()
        self.__train_feature_label = self.__df.loc[(
            self.__df["is_oot"] == 0), :]
        self.__test_feature_label = self.__df.loc[(
            self.__df["is_oot"] == 1), :]
        self.__train_feature_label = self.__train_feature_label.drop(
            ["id_no", "is_oot"], axis=1)
        self.__test_feature_label = self.__test_feature_label.drop(
            ["id_no", "is_oot"], axis=1)

        self.__train_feature = self.__train_feature_label[[
            i for i in self.__train_feature_label.columns if i != "is_overdue"
        ]].values
        self.__train_label = self.__train_feature_label["is_overdue"].values
        self.__test_feature = self.__test_feature_label[[
            i for i in self.__test_feature_label.columns if i != "is_overdue"
        ]].values
        self.__test_label = self.__test_feature_label["is_overdue"].values
        # 标准化
        self.__mms = MinMaxScaler()
        self.__mms.fit(self.__train_feature)
        self.__train_feature = self.__mms.transform(self.__train_feature)
        self.__test_feature = self.__mms.transform(self.__test_feature)
        # reshape samples × input_length × input_dim
        self.__train_feature = self.__train_feature.reshape((-1, 5, 3))
        self.__test_feature = self.__test_feature.reshape((-1, 5, 3))

    def function_set(self):
        self.__net = Sequential()
        self.__net.add(GRU(units=5, input_length=5, input_dim=3))
        self.__net.add(Dense(units=1, activation="sigmoid"))

    def optimizer_function(self):
        self.__net.summary()
        self.__net.compile(loss=keras.losses.binary_crossentropy,
                           optimizer=keras.optimizers.Adam(),
                           metrics=["accuracy"])

    def pick_the_best_function(self):
        self.__net.fit(self.__train_feature,
                       self.__train_label,
                       epochs=2,
                       batch_size=256)
        print(
            roc_auc_score(self.__test_label,
                          self.__net.predict_proba(self.__test_feature)))
Ejemplo n.º 13
0
class DeepLearning:
    def __init__(self, x_shape):
        from keras import Sequential
        from keras.callbacks import EarlyStopping
        from keras.layers import Dense, Dropout
        from keras.regularizers import l1_l2
        self.early_stopping = EarlyStopping(
            monitor='val_loss',
            min_delta=0,
            patience=50,
            mode='min',
            verbose=1,
        )
        self.classifier = Sequential()
        self.classifier.add(
            Dense(300,
                  kernel_initializer="he_normal",
                  activation="elu",
                  input_dim=x_shape))
        self.classifier.add(Dropout(0.3))
        self.classifier.add(
            Dense(450, kernel_initializer='he_normal', activation='elu'))
        self.classifier.add(Dropout(0.3))
        self.classifier.add(
            Dense(100, kernel_initializer='he_normal', activation='elu'))
        self.classifier.add(Dropout(0.3))
        self.classifier.add(
            Dense(20,
                  kernel_initializer='he_normal',
                  activation='elu',
                  kernel_regularizer=l1_l2()))
        self.classifier.add(
            Dense(1,
                  kernel_initializer='uniform',
                  activation="sigmoid",
                  activity_regularizer=l1_l2(0.005, 0.005)))

        self.classifier.compile(loss='binary_crossentropy',
                                optimizer='adam',
                                metrics=['accuracy'])

    # TODO: implementation using keras
    def learn(self,
              x_train,
              y_train,
              x_test,
              batch_size=10,
              is_stand_ml=False,
              is_al=False):
        self.classifier.fit(x_train,
                            y_train,
                            validation_split=0.1,
                            callbacks=[self.early_stopping],
                            epochs=100,
                            batch_size=64,
                            verbose=0)
        probs = self.classifier.predict_proba(x_test)

        if is_stand_ml:
            return probs
        if is_al:
            certainty = [abs(a - 0.5) for a in probs]
            return np.argpartition(certainty, -batch_size)[-batch_size:]
        return np.argpartition(probs[:, 1], -batch_size)[-batch_size:]
Ejemplo n.º 14
0
class Agent:
    # name should contain only letters, digits, and underscores (not enforced by environment)
    __name = 'Based_Agent'

    def __init__(self, stateDim, actionDim, agentParams):
        self.__stateDim = stateDim
        self.__actionDim = actionDim
        self.__action = np.random.random(actionDim)
        self.__step = 0

        self.__alpha = 0.001
        self.__gamma = 0.9
        self.__decision_every = 6
        self.__explore_probability = 0.2
        self.__max_replay_samples = 20

        self.__features = Features()
        self.__previous_action = None
        self.__current_out = None
        self.__previous_out = None
        self.__previous_meta_state = None
        self.__previous_state = None

        self.__test = agentParams[0] if agentParams else None
        self.__exploit = False

        self.__segments = 2
        self.__actions = 3**self.__segments

        try:
            self.__net = load_model('net')
        except:
            print('Creating new model')
            self.__net = Sequential([
                Dense(50, activation='elu', input_dim=self.__features.dim),
                Dense(30, activation='elu'),
                Dense(self.__actions),
                Reshape((self.__actions, 1))
            ])

        self.__net.compile(optimizer=SGD(lr=self.__alpha), loss='mean_squared_error', sample_weight_mode='temporal')

        try:
            self.__replay = Replay.load('replay')
        except Exception as a:
            self.__replay = Replay(self.__actions)

        self.__replay_X = []
        self.__replay_Y = []

    def start(self, state):
        self.__previous_state = state

        self.__choose_action(state)

        self.__previous_out = self.__current_out

        return self.__action

    def step(self, reward, state):
        self.__previous_state = state

        self.__step += 1
        if self.__step % self.__decision_every != 0:
            return self.__action

        self.__choose_action(state)

        if not self.__exploit:
            max_q = self.__current_out[np.argmax(self.__current_out)]
            self.__update_q(reward - self.__features.min_dist(state) / 100, max_q)

        self.__previous_out = self.__current_out

        return self.__action

    def end(self, reward):
        if not self.__exploit:
            self.__update_q(reward, reward)
            self.__replay.submit(self.__test, (self.__replay_X, self.__replay_Y), self.__step)
            self.__net.save('net')
            self.__replay.save('replay')

    def cleanup(self):
        pass

    def getName(self):
        return self.__name

    def __choose_action(self, state):
        meta_state = np.asarray(self.__features.get_features(state), dtype='float').reshape((1, self.__features.dim))
        out = self.__net.predict_proba([meta_state], batch_size=1)[0].flatten()

        self.__current_out = out

        if self.__exploit or self.__explore_probability < np.random.random():
            # take best action
            action = np.argmax(out)
        else:
            # take random action
            action = np.random.randint(0, self.__actions)

        self.__previous_action = action
        self.__previous_meta_state = meta_state

        self.__meta_to_action(action)

    def __update_q(self, reward, max_q):
        teach_out = self.__previous_out
        teach_out[self.__previous_action] = reward + self.__gamma * max_q

        # sampling from infinite stream
        if len(self.__replay_X) < self.__max_replay_samples:
            self.__replay_X.append(self.__previous_meta_state)
            self.__replay_Y.append((teach_out[self.__previous_action], self.__previous_action))

        elif np.random.random() < self.__max_replay_samples/self.__step:
            to_replace = np.random.randint(0, self.__max_replay_samples)
            self.__replay_X[to_replace] = self.__previous_meta_state
            self.__replay_Y[to_replace] = (teach_out[self.__previous_action], self.__previous_action)

        self.__net.fit([self.__previous_meta_state], [teach_out.reshape(1, self.__actions, 1)], verbose=0)

        replay_x, replay_y, replay_w = self.__replay.get_training()
        if replay_x:
            data = list(zip(replay_x, replay_y, replay_w))
            np.random.shuffle(data)
            for x, y, w in data:
                self.__net.fit([x], [y], sample_weight=[w], verbose=0)

    def __meta_to_action(self, meta):

        self.__action[:] = 0

        for segment in range(self.__segments):
            segment_action = meta % 3

            muscle_start = 30 * segment // self.__segments
            muscle_stop = 30 * (segment+1) // self.__segments

            if segment_action == 0:
                self.__action[muscle_start:muscle_stop:3] = 1

            if segment_action == 1:
                self.__action[muscle_start+1:muscle_stop:3] = 1

            if segment_action == 2:
                self.__action[muscle_start+2:muscle_stop:3] = 1

            meta //= 3
Ejemplo n.º 15
0
fpr_keras, tpr_keras, thereshold_keras = roc_curve(y_test, y_test_pred)
auc_keras = auc(fpr_keras, tpr_keras)
print('Testing data AUC', auc_keras)

# ROC curve for testing data
plt.figure(1)
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr_keras, tpr_keras, labels='Keras (area={:.3f})').format(auc_keras)
plt.xlabel('False positive rate')
plt.ylabel('True Positive rate')
plt.title('ROC Curve')
plt.legend(loc='best')
plt.show()

# AOC score of training
y_train_pred = model.predict_proba(X_train)
fpr_keras, tpr_keras, thereshold_keras = roc_curve(y_train, y_train_pred)
auc_keras = auc(fpr_keras, tpr_keras)
print('Training data AUC:', auc_keras)

# ROC curve of training
plt.figure(1)
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr_keras, tpr_keras, label='Keras (area={:.3f})'.format(auc_keras))
plt.xlabel('False Positive rate')
plt.ylabel('True Positive rate')
plt.title('ROC Curve')
plt.legend(loc='best')
plt.show()

# make y_train categorical and assign it to y_train_cat
Ejemplo n.º 16
0
if float(conf_mat[0,0]+conf_mat[0,1])!=0:
    precision = float(conf_mat[0,0])/float(conf_mat[0,0]+conf_mat[0,1])
#f1 score = 0
f1_score = 2*(float(precision*recall)/float(precision+recall))
print("confusion matrix")
print("----------------------------------------------")
print("accuracy")
print("%.6f" %accuracy)
print("racall")
print("%.6f" %recall)
print("precision")
print("%.6f" %precision)
print("f1score")
print("%.6f" %f1)

y_pred_proba = model.predict_proba(xtest)[::,1]
fpr, tpr, _ = metrics.roc_curve(y_true,  y_pred_proba)
auc = metrics.roc_auc_score(y_true, y_pred_proba)
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()

##############################################################################3

# Get training and test loss histories
training_loss = history.history['accuracy']
test_loss = history.history['val_accuracy']

# Create count of the number of epochs
epoch_count = range(1, len(training_loss) + 1)
Ejemplo n.º 17
0
class Learning:
    def machine_learning(self,
                         x_train,
                         y_train,
                         x_test,
                         smallest_class,
                         clf=None):
        # smallest class -> 1/0
        if clf is None:
            # n_estimators - number of trees
            # balances -
            clf = RandomForestClassifier(n_estimators=200,
                                         class_weight="balanced")
        clf.fit(np.asmatrix(x_train, dtype=np.float32), y_train)
        probs = clf.predict_proba(x_test)
        # probs closer to 0 -> 0
        return probs.argmax(0)[smallest_class]  # smallest class black

    # TODO: implementation using keras
    def deep_learning(self, x_train, y_train, x_test, smallest_class):
        # pass
        from keras import Sequential
        from keras.callbacks import EarlyStopping
        from keras.layers import Dense, Dropout
        from keras.regularizers import l1_l2
        # stop if there is no improvement
        early_stopping = EarlyStopping(monitor='val_loss',
                                       min_delta=0,
                                       patience=50,
                                       mode='min',
                                       verbose=1)
        self.classifier = Sequential()
        # he_normal - init weights normal
        self.classifier.add(
            Dense(300,
                  kernel_initializer="he_normal",
                  activation="relu",
                  input_dim=x_train.shape[1]))
        self.classifier.add(Dropout(0.5))
        self.classifier.add(
            Dense(100,
                  kernel_initializer='he_normal',
                  activation='relu',
                  kernel_regularizer=l1_l2(0.5)))
        self.classifier.add(Dropout(0.5))
        self.classifier.add(
            Dense(20,
                  kernel_initializer='he_normal',
                  activation='relu',
                  kernel_regularizer=l1_l2(0.5)))
        self.classifier.add(Dropout(0.5))
        self.classifier.add(
            Dense(1,
                  kernel_initializer='uniform',
                  activation="sigmoid",
                  kernel_regularizer=l1_l2(0.1)))

        self.classifier.compile(loss='binary_crossentropy',
                                optimizer='adam',
                                metrics=['accuracy'])
        self.classifier.fit(x_train,
                            y_train,
                            validation_split=0.1,
                            callbacks=[early_stopping],
                            epochs=10,
                            batch_size=520,
                            verbose=0)
        probs = self.classifier.predict_proba(x_test)
        return probs.argmax()
Ejemplo n.º 18
0
# Create NN model
model = Sequential()
model.add(Dense(2, input_dim=2, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=SGD(lr=0.1))

print(model.summary())

# Training
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])
model.fit(X, y, batch_size=1, epochs=1000, verbose=0)

# Result
print("Network test:")
print("XOR(0,0):", model.predict_proba(np.array([[0, 0]])))
print("XOR(0,1):", model.predict_proba(np.array([[0, 1]])))
print("XOR(1,0):", model.predict_proba(np.array([[1, 0]])))
print("XOR(1,1):", model.predict_proba(np.array([[1, 1]])))

# Parameters layer 1
W1 = model.get_weights()[0]
b1 = model.get_weights()[1]
# Parameters layer 2
W2 = model.get_weights()[2]
b2 = model.get_weights()[3]

print("W1:", W1)
print("b1:", b1)
print("W2:", W2)
print("b2:", b2)
Ejemplo n.º 19
0
y_predicted = model.predict(x_test)
#print(y_predicted.shape)
for i in range(test_instances):
    index = y_predicted[i].argmax()
    y_predicted[i] = [0, 0]
    y_predicted[i, index] = 1

accuracy = accuracy_score(y_test_matrix, y_predicted)
print("accuracy = ", accuracy)
print(
    'Balanced accuracy: ',
    balanced_accuracy_score(y_test_matrix.argmax(axis=1),
                            y_predicted.argmax(axis=1)))
print('f measure = ',
      f1_score(y_test_matrix.argmax(axis=1), y_predicted.argmax(axis=1)))
conf_matrix = confusion_matrix(y_test_matrix.argmax(axis=1),
                               y_predicted.argmax(axis=1))
print("Confusion matrix : \n", conf_matrix)

skplt.metrics.plot_roc(y_test,
                       model.predict_proba(x_test),
                       plot_macro=False,
                       plot_micro=False,
                       classes_to_plot=[1])
plt.show()
""" fpr, tpr, thresholds = roc_curve(y_test_matrix.argmax(axis=1), y_predicted.argmax(axis=1))

plt.figure(2)
plt.plot(fpr, tpr)
plt.show() """
Ejemplo n.º 20
0
if float(conf_mat[0, 0] + conf_mat[0, 1]) != 0:
    precision = float(conf_mat[0, 0]) / float(conf_mat[0, 0] + conf_mat[0, 1])
#f1 score = 0
f1_score = 2 * (float(precision * recall) / float(precision + recall))
print("confusion matrix")
print("----------------------------------------------")
print("accuracy")
print("%.3f" % accuracy)
print("racall")
print("%.3f" % recall)
print("precision")
print("%.3f" % precision)
print("f1score")
print("%.3f" % f1)

y_pred_proba = classifier.predict_proba(xtest)[::, 1]
fpr, tpr, _ = metrics.roc_curve(y_true, y_pred_proba)
auc = metrics.roc_auc_score(y_true, y_pred_proba)
plt.plot(fpr, tpr, label="data 1, auc=" + str(auc))
plt.legend(loc=4)
plt.show()

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()
print("runtime:" + str(datetime.now() - start))
#print("runtime:" + str(time.time()-start))
Ejemplo n.º 21
0
end1 = time.clock()
t1 = end1 - start1
model.save('my_model3.h5')
#测试时间记录
start2 = time.clock()
loss, accuracy = model.evaluate(x_test, y_test)
end2 = time.clock()
t2 = end2 - start2

#评估指标的计算
pre_y = model.predict_classes(x_test)
y_test = np.array(y_test)
metrics = classification_report(y_test, pre_y,digits=4)
print(metrics)
confusion_m = confusion_matrix(y_test, pre_y)
y_pred_pro = model.predict_proba(x_test)[:, 0]
fpr, tpr, thresholds = roc_curve(y_test, y_pred_pro, pos_label=1)
roc_auc = auc(fpr, tpr)
mat_plt(history)
plot_confusion_matrix(confusion_m)
roc(fpr, tpr, roc_auc)
model.save('my_model3.h5')
#计算fpr、fpr
def fpr_tpr(confusion_m):
    sum = 0
    count = 0
    k = 0
    lubao = []
    for i in confusion_m:
        for j in i:
            sum = sum + j
Ejemplo n.º 22
0
print(train_labels[:10])
NUM_DIGITS = 10
trainLabels = utils.to_categorical(train_labels, NUM_DIGITS)
testLabels = utils.to_categorical(test_labels, NUM_DIGITS)

model = Sequential()
model.add(Dense(units=128, activation=tf.nn.relu, input_shape=(FLATTEN_DIM, )))
model.add(Dense(units=64, activation=tf.nn.relu))
model.add(Dense(units=10, activation=tf.nn.softmax))
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])
print(model.summary())
cb2 = TensorBoard(log_dir="logs/demo71",
                  histogram_freq=0,
                  write_graph=True,
                  write_images=True)
model.fit(trainImages,
          trainLabels,
          epochs=100,
          validation_data=(testImages, testLabels),
          callbacks=[cb1, cb2])
predictedLabels = model.predict_classes(testImages)
print("result:", predictedLabels[:10])
predictedProbs = model.predict_proba(testImages)
print("result:", predictedProbs[:10])
predicted = model.predict(testImages)
print('result:', predicted[:10])
loss, accuracy = model.evaluate(testImages, testLabels)
print("test accuracy:%.4f" % accuracy)
Ejemplo n.º 23
0
# cp_callback = keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
#                                                  save_weights_only=True,
#                                                  verbose=1)

## Fit model for multiple labels and print accuracy
## 2296*1130 = 2594485-5
# history= model.fit(X_train, Y_train, validation_split=0.3,batch_size=10000, epochs=50,callbacks=[cp_callback])
history = model.fit(X_train,
                    Y_train,
                    validation_split=0.3,
                    batch_size=10000,
                    epochs=50,
                    verbose=2)

pred = model.predict(X_test, verbose=1)
pred_proba = model.predict_proba(X_test)
pred[pred >= 0.5] = 1
pred[pred < 0.5] = 0
# print('pred: ', pred)
# print('Y_test: ', Y_test)

conf_mat = multilabel_confusion_matrix(Y_test, pred)
# print('conf mat: ')
# print(conf_mat)

# summarize history for accuracy
ExtraSensoryHelperFunctions.PlotEpochVsAcc(plt, history)

# summarize history for loss
ExtraSensoryHelperFunctions.PlotEpochVsLoss(plt, history)
Ejemplo n.º 24
0
class SafetyModelByCnnRandomForestStack(SafetyModel):
    MODEL_TYPE = 'safety-cnn-rf-v0'
    CNN_FEATURES = [
        'acceleration_x', 'acceleration_y', 'acceleration_z',
        'acceleration_gravity_diff_magnitude', 'Bearing', 'gyro_x_filtered',
        'gyro_y_filtered', 'gyro_z_filtered', 'gyro_filtered_magnitude',
        'Speed', 'Accuracy', 'second', 'second_diff', 'orientation_theta',
        'orientation_psi', 'orientation_phi'
    ]
    SEQUENCE_MAX_LEN = 200

    def __init__(self):
        super(SafetyModelByCnnRandomForestStack,
              self).__init__(self.MODEL_TYPE)
        self._model_first = None
        self._model_second = None
        self._features = None

    def build(self, data: pd.DataFrame, label: pd.DataFrame):
        print('Preprocess data ...')
        train_label = self.preprocess_label(label)
        train_dataset_prep = SafetyModel._preprocess(data)

        print('Aggregate data ...')
        train_agg_data = self._aggregate_data(train_dataset_prep)

        print('Preprocess data - To CNN input format ...')
        train_dataset_cnn, train_booking_ids = self._to_cnn_dataset(
            train_dataset_prep)
        del train_dataset_prep

        # First Step: CNN Model
        cnn_model = self._create_model_cnn(train_dataset_cnn)
        cnn_model.compile(loss='binary_crossentropy',
                          optimizer='adam',
                          metrics=['binary_accuracy'])
        train_label_cnn = pd.merge(train_booking_ids,
                                   train_label,
                                   on='bookingID').label

        callbacks_list = [EarlyStopping(monitor='binary_accuracy', patience=3)]
        print('Building CNN model ...')
        history = cnn_model.fit(train_dataset_cnn,
                                np.array(train_label_cnn).reshape((-1, 1)),
                                batch_size=4,
                                epochs=50,
                                callbacks=callbacks_list,
                                validation_split=0.2,
                                verbose=1)
        print('Done learning CNN model.')

        self._model_first = Sequential()
        for layer in cnn_model.layers[:-1]:
            self._model_first.add(layer)
        for layer in self._model_first.layers:
            layer.trainable = False

        # Second Step: Stacking Random Forest
        train_cnn_embed = self._model_first.predict_proba(train_dataset_cnn)
        agg_features = train_agg_data.columns[
            train_agg_data.columns.str.contains("max|std|ratio")]
        train_data_stack = pd.concat([
            pd.Series(train_booking_ids.bookingID),
            train_agg_data[agg_features],
            pd.DataFrame(train_cnn_embed,
                         columns=[
                             'cnn_result_' + str(i)
                             for i in range(len(train_cnn_embed[0]))
                         ])
        ],
                                     axis=1)
        train_data_stack = pd.merge(train_data_stack,
                                    train_label,
                                    on='bookingID')
        self._features = train_data_stack.columns[
            train_data_stack.columns != 'label']

        self._model_second = RandomForestClassifier(n_estimators=200,
                                                    random_state=0,
                                                    min_samples_leaf=75)
        self._model_second.fit(train_data_stack[self._features],
                               train_data_stack.label)

    def save(self, path: str):
        obj = {
            'model_type': self._model_type,
            'features': self._features,
            'model_first': self._model_first,
            'model_second': self._model_second
        }
        joblib.dump(obj, path, protocol=2)

    def load(self, path: str):
        obj = joblib.load(path)
        if obj['model_type'] != self.MODEL_TYPE:
            raise ValueError(
                'Incompatible type to load. Expect {} but get {}'.format(
                    self.MODEL_TYPE, obj['model_type']))
        self._features = obj['features']
        self._model_first = obj['model_first']
        self._model_second = obj['model_second']

    def predict(self, data: pd.DataFrame) -> pd.DataFrame:
        if (self._model_first is None) or (self._model_second is None):
            raise AttributeError(
                'Model is not available. Build or load the model beforehand.')

        print('Preprocess data ...')
        test_dataset_prep = SafetyModel._preprocess(data)

        print('Aggregate data ...')
        test_agg_data = self._aggregate_data(test_dataset_prep)

        print('Preprocess data - To CNN input format ...')
        test_dataset_cnn, test_booking_ids = self._to_cnn_dataset(
            test_dataset_prep)
        del test_dataset_prep

        # First Step: CNN Model
        test_cnn_embed = self._model_first.predict_proba(test_dataset_cnn)

        # Second Step: Stacking Random Forest
        agg_features = test_agg_data.columns[
            test_agg_data.columns.str.contains("max|std|ratio")]
        test_data_stack = pd.concat([
            pd.Series(test_booking_ids.bookingID), test_agg_data[agg_features],
            pd.DataFrame(test_cnn_embed,
                         columns=[
                             'cnn_result_' + str(i)
                             for i in range(len(test_cnn_embed[0]))
                         ])
        ],
                                    axis=1)

        prediction = self._model_second.predict_proba(
            test_data_stack[self._features])
        prediction = prediction[:,
                                np.argwhere(
                                    self._model_second.classes_ == 1)[0][0]]
        prediction_df = pd.DataFrame(data={
            'bookingID': test_data_stack.bookingID,
            'prediction': prediction
        })
        return prediction_df

    @staticmethod
    def _create_model_cnn(dataset):
        num_seq = len(dataset[0])
        num_features = len(dataset[0][0])

        inpt = Input(shape=(num_seq, num_features))

        convs = []

        conv1 = Conv1D(8, 1, activation='relu')(inpt)
        pool1 = GlobalMaxPooling1D()(conv1)
        convs.append(pool1)

        conv2 = Conv1D(8, 3, activation='relu')(inpt)
        pool2_1 = AveragePooling1D(pool_size=5)(conv2)
        conv2_1 = Conv1D(16, 3, activation='relu')(pool2_1)
        pool2_2 = GlobalMaxPooling1D()(conv2_1)
        convs.append(pool2_2)

        out = Concatenate()(convs)
        first_segment_model = Model(inputs=[inpt], outputs=[out])

        model = Sequential()
        model.add(first_segment_model)
        model.add(Dropout(0.2))
        model.add(Dense(16, activation='sigmoid'))
        model.add(Dense(1, activation='sigmoid'))

        print(first_segment_model.summary())
        print(model.summary())
        return model

    def _to_cnn_dataset(
            self, preprocessed_dataset: pd.DataFrame) -> (list, pd.DataFrame):
        data_cnn = preprocessed_dataset.copy()

        data_cnn[['acceleration_x', 'acceleration_y', 'acceleration_z']] = \
            data_cnn[['acceleration_x', 'acceleration_y', 'acceleration_z']] / 10.0
        data_cnn[['gyro_x_filtered', 'gyro_y_filtered', 'gyro_z_filtered']] = \
            data_cnn[['gyro_x_filtered', 'gyro_y_filtered', 'gyro_z_filtered']]
        data_cnn['Bearing'] = data_cnn['Bearing'] / 360.0
        data_cnn[['orientation_theta', 'orientation_psi', 'orientation_phi']] = \
            data_cnn[['orientation_theta', 'orientation_psi', 'orientation_phi']] / 180.0
        data_cnn['Speed'] = data_cnn['Speed'] / 35.0
        data_cnn['second'] = data_cnn['second'] / 1750.0
        data_cnn['second_diff'] = data_cnn['second_diff'] / 30.0
        data_cnn['Accuracy'] = data_cnn['Accuracy'] / 15.0

        data_cnn, booking_ids = self._to_keras_input(data_cnn,
                                                     self.CNN_FEATURES,
                                                     self.SEQUENCE_MAX_LEN)
        return data_cnn, booking_ids
Ejemplo n.º 25
0
    def fitting(self):   
   
        dim_row = self.lags   # tiempo
        dim_col = 1    # features or chanels (Volume)
        output_dim = 3  # 3 for categorical
        
        
        #data = np.random.random((1000, dim_row, dim_col))
        #clas = np.random.randint(3, size=(1000, 1))
        ##print(clas)
        #clas = to_categorical(clas)
        ##print(clas)
        data = self.X_train
        data_test = self.X_test
                
        data = data.values.reshape(-1, dim_row, dim_col)
        data_test = data_test.values.reshape(-1, dim_row, dim_col)
        
        clas = self.y_train
        clas_test = self.y_test 
        clas = to_categorical(clas)
        clas_test = to_categorical(clas_test)

        cat0 = self.y_train.tolist().count(0)
        cat1 = self.y_train.tolist().count(1)
        cat2 = self.y_train.tolist().count(2)
        
        print("may: ", cat1, "  ", "menor: ", cat2, " ", "neutro: ", cat0)
        
        n_samples_0 = cat0
        n_samples_1 = (cat1 + cat2)/2.0
        n_samples_2 = (cat1 + cat2)/2.0

        class_weight={
                0: 1.0,
                1: n_samples_0/n_samples_1,
                2: n_samples_0/n_samples_2}            
        
        def class_1_accuracy(y_true, y_pred):
        # cojido de: http://www.deepideas.net/unbalanced-classes-machine-learning/
            class_id_true = K.argmax(y_true, axis=-1)
            class_id_preds = K.argmax(y_pred, axis=-1)
            
            accuracy_mask = K.cast(K.equal(class_id_preds, 1), 'int32')
            class_acc_tensor = K.cast(K.equal(class_id_true, class_id_preds), 'int32') * accuracy_mask
            
            class_acc = K.sum(class_acc_tensor) / K.maximum(K.sum(accuracy_mask), 1)
            return class_acc
        
        
        class SecondOpinion(Callback):
            def __init__(self, model, x_test, y_test, N):
                self.model = model
                self.x_test = x_test
                self.y_test = y_test
                self.N = N
                self.epoch = 1
        
            def on_epoch_end(self, epoch, logs={}):
                if self.epoch % self.N == 0:
                    y_pred = self.model.predict(self.x_test)
                    pred_T = 0
                    pred_F = 0
                    for i in range(len(y_pred)):
                        if np.argmax(y_pred[i]) == 1 and np.argmax(self.y_test[i]) == 1:
                            pred_T += 1
                        if np.argmax(y_pred[i]) == 1 and np.argmax(self.y_test[i]) != 1:
                            pred_F += 1
                    if pred_T + pred_F > 0:
                        Pr_pos = pred_T/(pred_T + pred_F)
                        print("Yoe: epoch, Probabilidad pos: ", self.epoch, Pr_pos)
                    else:
                        print("Yoe Probabilidad pos: 0")
                self.epoch += 1
        
        
        
        
        
#################################################################################################################        
        model = Sequential()
#        model.add(Reshape(input_shape=(dim_row, dim_col), target_shape=(dim_row, dim_col, 1)))

        if self.nConv > 0:
            #model.add(Reshape((dim_row, dim_col, 1)))
            model.add(Reshape(input_shape=(dim_row, dim_col), target_shape=(dim_row, dim_col, 1)))
            for i in range(self.nConv):
                model.add(Convolution2D(self.conv_nodes, kernel_size = (self.kernel_size, 1), padding = 'same', kernel_regularizer = regularizers.l2(0.01)))
                model.add(Activation('relu'))
            model.add(Reshape(target_shape=(dim_row, self.conv_nodes * dim_col)))
        # Como nuestro output tiene una sola dimension no es necesario "return_sequences='True'"
        # y tampoco es necesario usar TimeDistributed
        if self.nConv == 0:
            model.add(LSTM(units=self.lstm_nodes, return_sequences=True, activation='tanh', input_shape=(dim_row, dim_col)))
        for i in range(self.nLSTM - 1):
            model.add(LSTM(units=self.lstm_nodes, return_sequences=True, activation='tanh'))
        model.add(Dropout(0.5))
        model.add(TimeDistributed(Dense(units = output_dim))) # the dimension of index one will be considered to be the temporal dimension
        model.add(Activation('softmax'))  # for loss = 'categorical_crossentropy'
        #model.add(Activation('sigmoid'))  # for loss = 'binary_crossentropy'
        
        # haciendo x: x[:, -1, :], la segunda dimension desaparece quedando solo 
        # los ULTIMOS elementos (-1) de dicha dimension:
        # Try this to see:
        # data = np.random.random((5, 3, 4))
        # print(data)
        # print(data[:, -1, :])  
        
        model.add(Lambda(lambda x: x[:, -1, :], output_shape = [output_dim]))
        print(model.summary())
        
        tensorboard_active = False
        val_loss = False
        second_opinion = True
        callbacks = []
        if tensorboard_active:
            callbacks.append(TensorBoard(
                log_dir=self.putmodel + "Tensor_board_data",
                histogram_freq=0,
                write_graph=True,
                write_images=True))
        if val_loss:
            callbacks.append(EarlyStopping(
                monitor='val_loss', 
                patience=5))
        if second_opinion:
            callbacks.append(SecondOpinion(model, data_test, clas_test, 10))
        #model.compile(loss = 'categorical_crossentropy', optimizer='Adam', metrics = ['categorical_accuracy'])
        #model.compile(loss = 'binary_crossentropy', optimizer=Adam(lr=self.learning), metrics = ['categorical_accuracy'])
        model.compile(loss = 'categorical_crossentropy', optimizer='Adam', metrics = [class_1_accuracy])
                
        model.fit(x=data, 
                  y=clas,
                  batch_size=self.batch_size, epochs=800, verbose=2, 
                  callbacks = callbacks,
                  class_weight = class_weight)
                  #validation_data=(data_test, clas_test))
        
#####################################################################################################################
        
        # serialize model to YAML
        model_yaml = model.to_yaml()
        with open("model.yaml", "w") as yaml_file:
            yaml_file.write(model_yaml)
        # serialize weights to HDF5
        model.save_weights("model.h5")
        print("Saved model to disk")
        
#        # load YAML and create model
#        yaml_file = open('model.yaml', 'r')
#        loaded_model_yaml = yaml_file.read()
#        yaml_file.close()
#        loaded_model = model_from_yaml(loaded_model_yaml)
#        # load weights into new model
#        loaded_model.load_weights("model.h5")
#        print("Loaded model from disk")
#        loaded_model.compile(loss = 'categorical_crossentropy', optimizer='Adam', metrics = [class_1_accuracy])
#        
        print("Computing prediction ...")
        y_pred = model.predict_proba(data_test)
        
        model.reset_states()
        print("Computing train evaluation ...")
        score_train = model.evaluate(data, clas, verbose=2)
        print('Train loss:', score_train[0])
        print('Train accuracy:', score_train[1])

        model.reset_states()
#        score_train_loaded = loaded_model.evaluate(data, clas, verbose=2)
#        loaded_model.reset_states()
#        print('Train loss loaded:', score_train[0])
#        print('Train accuracy loaded:', score_train[1])

        print("Computing test evaluation ...")
        score_test = model.evaluate(data_test, clas_test, verbose=2)
        print('Test loss:', score_test[0])
        print('Test accuracy:', score_test[1])

        model.reset_states()
#        score_test_loaded = loaded_model.evaluate(data_test, clas_test, verbose=2)
#        loaded_model.reset_states()
#        print('Test loss loaded:', score_test[0])
#        print('Test accuracy loaded:', score_test[1])

        
        pred_T = 0
        pred_F = 0        
        for i in range(len(y_pred)):
            if np.argmax(y_pred[i]) == 1 and np.argmax(clas_test[i]) == 1:
                pred_T += 1
#                print(y_pred[i])
            if np.argmax(y_pred[i]) == 1 and np.argmax(clas_test[i]) != 1:
                pred_F += 1
        if pred_T + pred_F > 0:
            Pr_pos = pred_T/(pred_T + pred_F)
            print("Yoe Probabilidad pos: ", Pr_pos)
        else:
            print("Yoe Probabilidad pos: 0")
        
        history = DataFrame([[self.skip, self.nConv, self.nLSTM, 
                    self.learning, self.batch_size, 
                    self.conv_nodes, self.lstm_nodes, 
                    score_train[0], score_train[1], 
                    score_test[0], score_test[1]]], columns = ('Skip', 'cConv', 'nLSTM', 'learning', 
                                 'batch_size', 'conv_nodes', 'lstm_nodes', 
                                 'loss_train', 'acc_train', 'loss_test', 'acc_test'))
        self.history = self.history.append(history)
Ejemplo n.º 26
0
classifier.add(Dense(1, activation='sigmoid'))

#Compiling the NN
classifier.compile(optimizer='adam',
                   loss='binary_crossentropy',
                   metrics=['accuracy'])

#Fit the data
classifier.fit(x_train, y_train, batch_size=10, epochs=10)

# Predicting
y_pred = classifier.predict(x_test)
y_pred = np.round(y_pred)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

# Probability estimate of all classes(two class) for the test data:
y_pred_prob = classifier.predict_proba(X_ts)

#CSV results
from pandas import DataFrame

column = ['Probability of belonging to class 1 - NN']

dic = dict(zip(column, [y_pred_prob.tolist()]))

df = DataFrame(dic)
export_csv = df.to_csv('NN.csv', columns=column)