Beispiel #1
0
 MaxPooling2D(pool_size=3, strides=2),

 Flatten(),

 Dense(4096, activation="relu"),
 Dense(4096, activation="relu"),
 Dense(1000, activation="softmax")
], name="CNN model")

model.summary()

for layer in model.layers:
    print(layer.name, layer.input_shape, layer.output_shape, layer.count_params())

model.name, model.input_shape, model.output_shape, model.count_params()

"""
If you have 10 filters that are 3 x 3 x 3 in one layer of a neural network,
how many parameters does that layer have ?

Answer :

3 x 3 x 3 + bias = 28 => 1 filter
28 x 10 => 280 => 10 filters

Conv Layers : f_l x f_l x n_c (l-1) x n_c(l) + n_c(l)

Dense Layer : n_c(l) x n_c (l-1) + n_c(l)

n(l-1) + 2p(l) - f(l)/s(l) + 1
Beispiel #2
0
class NNmodel():
    def __init__(self,
                 Num_classes=3,
                 learning_rate=0.001,
                 batch_size=32,
                 decay=0,
                 epochs=100,
                 stateful_mode=False,
                 model=None,
                 regression=False,
                 **kwargs):

        self.Num_classes = Num_classes
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.decay = decay
        self.epochs = epochs
        self.stateful_mode = stateful_mode
        self.regression = regression
        if model is None:
            self.model = Sequential()
        else:
            self.init_model(model)

    def init_model(self, model):
        # compile the model and init some attr of the object by extracting info from the model
        self.model = model
        self.Num_classes = int(self.model.layers[-1].output_shape[-1])
        if self.Num_classes == 1:
            self.regression = True
        self.model_compile()

    def validate_model_shape(self, x):

        if x.ndim == 2:
            x = np.expand_dims(x, axis=0)

        self._validate_input_shape(x)
        self._validate_output_shape()

    def reset_session(self):
        # try to clear training session to speed up
        session.close()
        k.clear_session()

    def model_compile(self):
        # give model different compile methods
        if self.regression:
            self.model.compile(loss="mean_squared_error",
                               optimizer=optimizers.Adam(lr=self.learning_rate,
                                                         decay=self.decay),
                               metrics=['mae'])
        else:
            if self.Num_classes == 2:
                self.model.compile(
                    loss='binary_crossentropy',
                    optimizer=optimizers.Adam(lr=self.learning_rate,
                                              decay=self.decay),
                    metrics=['accuracy'])
            else:
                self.model.compile(
                    loss='categorical_crossentropy',
                    optimizer=optimizers.Adam(lr=self.learning_rate,
                                              decay=self.decay),
                    metrics=['accuracy'])

    def evaluate(self, data_list):
        '''
        Main evaluation function
        '''
        def _metric_compute(set_key,
                            display_name,
                            data=None,
                            data_generator=None):
            if self.regression:
                self._compute_metric_regression(data, data_generator, set_key,
                                                display_name, metric_dict)
            else:
                self._compute_metric_classification(data, data_generator,
                                                    set_key, display_name,
                                                    metric_dict)

        output_dict = {}
        output_dict['parameters'] = self.model.count_params()

        metric_dict = {}

        for da in data_list:

            # compute for training set
            _metric_compute(**da)

        output_dict['metric'] = metric_dict

        return output_dict

    def _compute_metric_classification(self, data, data_generator, set_key,
                                       display_name, metric_dict):
        '''
        Main evaluation function for classification task
        :return: metrics_tr, metrics_val, metrics_te, where metrics includes accuracy, micro-F1 score, and confusion matrix
        '''

        if self.stateful_mode:
            self.model.reset_states()

        if data:
            X, y = data
            if X is None:
                return
            y_pred = np.argmax(self.model.predict(X,
                                                  batch_size=self.batch_size),
                               axis=-1)

        else:
            data_generator.__reset_index__()
            y_pred = np.argmax(self.model.predict(x=data_generator), axis=-1)
            with h5py.File(data_generator.data, "r") as f:
                y = f["y_{}".format(data_generator.dataset)][:]
            data_generator.on_epoch_end()

        if y.ndim > 1:
            if y.shape[1] > 1:
                # y has been one hot encoded, decode it
                y = np.argmax(y, axis=-1)
            else:
                y = np.squeeze(y)

        accuracy = accuracy_score(y, y_pred)
        f1 = f1_score(y, y_pred, average='weighted')
        cm = self._compute_confusion_matrix(y, y_pred)

        metric_key = ['accuracy', 'f1_score', 'confusion_matrix']

        metric = {}
        metric["display_name"] = display_name
        metric[metric_key[0]] = accuracy
        metric[metric_key[1]] = f1
        metric[metric_key[2]] = cm.tolist()

        metric_dict[set_key] = metric

    def _compute_confusion_matrix(self, y, y_pred):
        # compute the confusion matrix by hand
        cm = np.zeros((self.Num_classes, self.Num_classes))
        for i in range(len(y)):
            cm[int(y[i]), int(y_pred[i])] += 1
        return cm

    def _compute_metric_regression(self, data, data_generator, set_key,
                                   display_name, metric_dict):
        '''
        Main evaluation function for classification task
        :return: metrics_tr, metrics_val, metrics_te, where metrics includes accuracy, micro-F1 score, and confusion matrix
        '''

        if self.stateful_mode:
            self.model.reset_states()

        if data:
            X, y = data
            y_pred = np.argmax(self.model.predict(X,
                                                  batch_size=self.batch_size),
                               axis=-1)

        else:
            data_generator.__reset_index__()
            y_pred = np.argmax(self.model.predict(x=data_generator), axis=-1)
            with h5py.File(data_generator.data, "r") as f:
                y = f["y_{}".format(data_generator.dataset)][:]
            data_generator.on_epoch_end()

        metric_key = [
            'mean_squared_error', 'mean_abs_error', 'variance', 'r2_score'
        ]

        mse = mean_squared_error(y, y_pred)
        mae = mean_absolute_error(y, y_pred)
        evs = explained_variance_score(y, y_pred)
        r2 = r2_score(y, y_pred)

        metric = {}
        metric["display_name"] = display_name
        metric[metric_key[0]] = mse
        metric[metric_key[1]] = mae
        metric[metric_key[2]] = evs
        metric[metric_key[3]] = r2

        metric_dict[set_key] = metric

    def predict(self, x):
        '''
        :param x: a numpy testing array
        :return: a numpy array of probability in shape [m, n], m: the number of testing samples; n: the number of classes
        '''

        return self.model.predict(x, batch_size=self.batch.size)

    def predict_class(self, x):
        '''
        :param x: a numpy testing array
        :return: a numpy array of labels in shape [m, 1], m is the number of testing samples
        '''

        return self.model.predict_classes(x, batch_size=self.batch.size)

    def get_config(self):
        '''
        :return: summary of the model
        '''

        return self.model.summary()

    def initialize(self):
        '''
        re-initialize a trained model

        :param model: given a model
        :return: a new model which has replaced the original CuDNNLSTM with LSTM
        '''
        session = K.get_session()

        new_model = Sequential()

        for i in range(len(self.model.layers)):
            if not self.model.layers[i].get_config()['name'].find(
                    'batch_normalization') != -1:
                for v in self.model.layers[i].__dict__:
                    v_arg = getattr(self.model.layers[i], v)
                    if hasattr(v_arg, 'initializer'):
                        initializer_method = getattr(v_arg, 'initializer')
                        initializer_method.run(session=session)
                        print('reinitializing layer {}.{}'.format(
                            self.model.layers[i].name, v))

        print(new_model.summary())

        new_model.compile(loss=self.model.loss, optimizer=self.model.optimizer)

        return new_model
x = Conv2D(filters=384, kernel_size=3, strides=1, activation="relu")(x)
x = ZeroPadding2D(padding=1)(x)
x = Conv2D(filters=256, kernel_size=3, strides=1, activation="relu")(x)
x = MaxPooling2D(pool_size=3, strides=2)(x)
x = Flatten()(x)
x = Dense(4096)(x)
x = Dense(4096)(x)
outputs = Dense(1000)(x)

# At this point, you can create a Model by specifying its inputs and
# outputs in the graph of layers
model_functional = Model(inputs=inputs, outputs=outputs, name="CNNmodel")

model_functional.summary()

assert model_functional.count_params() == model.count_params()
"""
3. SUBCLASS API
Where you implement everything from scratch on your own.
Use this if you have complex, out-of-the-box research use cases.
"""


class triBlockArchitecture(tf.keras.layers.Layer):
    def __init__(self, block=[True, True, True], f=1, k=1, p=1, s=1):
        self.block = block
        super(triBlockArchitecture, self).__init__()
        self.pad = ZeroPadding2D(p)
        self.conv = Conv2D(filters=f,
                           kernel_size=k,
                           strides=s,
class NNetworkLearning(NNetworkAbstract):
    def saveModel(self, file_name):
        if file_name:
            self.model.save(file_name, save_format='h5')
            return True
        else:
            return False

    def createModel(self):
        del self.model
        self.model = Sequential()
        self.model.add(
            Conv1D(64,
                   5,
                   data_format='channels_first',
                   input_shape=(settings.CHANNELS_NUM, settings.STEP),
                   activation='relu',
                   padding='same'))
        self.model.add(
            Conv1D(64,
                   5,
                   activation='relu',
                   padding='same',
                   data_format='channels_first'))
        self.model.add(MaxPooling1D(2, data_format='channels_first'))

        self.model.add(Dropout(0.25))
        self.model.add(
            Conv1D(128,
                   5,
                   activation='relu',
                   padding='same',
                   data_format='channels_first'))
        self.model.add(
            Conv1D(128,
                   5,
                   activation='relu',
                   padding='same',
                   data_format='channels_first'))
        self.model.add(MaxPooling1D(2, data_format='channels_first'))

        self.model.add(Dropout(0.25))
        self.model.add(Flatten())
        self.model.add(Dense(1000, activation='relu'))
        self.model.add(Dropout(0.5))
        self.model.add(Dense(len(settings.KEY_l), activation='sigmoid'))
        self.model.compile(loss='mse', optimizer="Adam", metrics=["mae"])
        self.model.summary()
        return self.model.count_params()

    def parseData(self, dir_name, form):
        if self.data:
            return False
        return ParserThread(dir_name, form)

    def getParsedData(self,
                      progress,
                      x_train_name,
                      y_train_name,
                      x_test_name=None,
                      y_test_name=None):
        if self.data:
            return False
        output = []
        x_data = self.getDataByFileName(x_train_name)
        progress.setValue(1)
        y_data = self.getDataByFileName(y_train_name)
        progress.setValue(2)
        if x_test_name and y_test_name:
            output.append(x_data)
            x_test = self.getDataByFileName(x_test_name)
            progress.setValue(3)
            output.append(y_data)
            output.append(x_test)
            y_test = self.getDataByFileName(y_test_name)
            progress.setValue(4)
            output.append(y_test)
            self.data = tuple(output)
            return True
        else:
            x_train, y_train, x_test, y_test = Parser.split_data(
                x_data, y_data)
            progress.setValue(4)
            self.data = (np.array(x_train), y_train, np.array(x_test), y_test)
            return True

    def getDataByFileName(self, file_name):
        return np.load(file_name)

    def saveParsedData(self, dir_name):
        if self.data != None:
            np.save(dir_name + r'\x_train', self.data[0])
            np.save(dir_name + r'\y_train', self.data[1])
            np.save(dir_name + r'\x_test', self.data[2])
            np.save(dir_name + r'\y_test', self.data[3])

    def trainModel(self, form):
        if self.data and self.model:
            return TrainModelThread(self.data, self.model, form)

    def testModel(self, form):
        self.testOnData(self.data[0], self.data[1], form, 'train')
        self.testOnData(self.data[2], self.data[3], form, 'test')

    def score_errors(self, prediction, y, form, data_type):
        first_all = 0
        first_false = 0
        first_acc = 0
        first_max_acc = 0
        second_all = 0
        second_false = 0
        second_acc = 0
        second_max_acc = 0
        second_hd_avg = 0
        second_hd_max = 0
        second_hd_min = 1

        for i in range(len(prediction)):
            s = self.Hemming_distance(np.array(y[i]), np.array(prediction[i]))

            acc = (1 - s / len(settings.KEY_l))
            if (y[i] == np.array(settings.KEY_l)).all():
                first_all += 1
                first_acc += acc
                if acc > first_max_acc:
                    first_max_acc = acc
                if s > 0:
                    first_false += 1
            else:
                second_all += 1
                second_acc += acc
                if acc > second_max_acc:
                    second_max_acc = acc

                d = self.Hemming_distance(np.array(prediction[i]),
                                          np.array(settings.KEY_l)) / len(
                                              settings.KEY_l)
                second_hd_avg += d
                if d > second_hd_max:
                    second_hd_max = d
                if d < second_hd_min:
                    second_hd_min = d
                if d == 0:
                    second_false += 1

        first_error = first_false / first_all
        second_error = second_false / second_all
        first_acc_avg = first_acc / first_all
        second_acc_avg = second_acc / second_all
        second_hd_avg = second_hd_avg / second_all

        form_fields = form.getFormFields(data_type)
        if len(form_fields) != 0:
            form_fields[0].setText("{:d}".format(len(prediction)))
            form_fields[1].setText("{:d}".format(first_all))
            form_fields[2].setText("{:d}".format(first_false))
            form_fields[3].setText("{:.5f}".format(first_error))
            form_fields[4].setText("{:.5f}".format(first_acc_avg))
            form_fields[5].setText("{:d}".format(second_all))
            form_fields[6].setText("{:d}".format(second_false))
            form_fields[7].setText("{:.5f}".format(second_error))
            form_fields[8].setText("{:.5f}".format(second_hd_avg))

        print("Количество тестовых данных = ", len(prediction))
        print(
            'Полное количество "своих" = {a:d}, количество ложных срабатываний = {f:d}, отношение ложных срабатываний к всем "своим" = {e:.5f}, средняя точность = {a_a:.5f}, max_acc = {m_a:.5f}'
            .format(a=first_all,
                    f=first_false,
                    e=first_error,
                    a_a=first_acc_avg,
                    m_a=first_max_acc))
        print(
            'Полное количество "чужих" = {a:d}, количество ложных пропусков = {f:d}, отношение ложных пропусков ко всем "чужим" = {e:.5f}, среднее расстояние Хемминга = {hd_a:.5f}, max расстояние Хемминга = {hd_max:.5f}, min расстояние Хемминга = {hd_min:.5f}'
            .format(a=second_all,
                    f=second_false,
                    e=second_error,
                    hd_a=second_hd_avg,
                    hd_max=second_hd_max,
                    hd_min=second_hd_min))

    def testOnData(self, x, y, form, data_type):
        prediction = self.model.predict(x)
        self.score_errors(prediction, y, form, data_type)

    def Hemming_distance(self, x, y):
        d = np.greater(np.abs(x - y),
                       [settings.MODUL / 2] * len(x)).astype('int')
        return np.sum(d)