Exemple #1
0
    labels = np.array(labels)

    pca = Pca()
    data = preprocessing.scale(inputs)

    pcaAdapt = PcaAdapt(13)
    pcaAdapt.train(data)

    result = np.matrix.transpose(pcaAdapt.pca_result(data)).reshape(
        len(data), number_final_att)

    mlp = MLP(3)

    points = result
    inputs = points
    mlp.create_network(inputs.shape[1:], 0.001)
    mlp.train(inputs, labels, num_epochs, batch_size)
    '''  
    data, target = read_file()
    data = preprocessing.scale(data)

    pca = Pca()
    cov = pca.cov_matrix(data[:, 0], data[:, 1], data[:, 2], data[:, 3])

    values, vectors = pca.eigen_values_vectors(cov)
    values, vectors = pca.sort_eigen(values, vectors)

    vectors = pca.eigen_strip_vectors(values, vectors, 0.90)

    print(vectors)
    values = values[:len(vectors[0])]
class Classifier:
    """MLP classifier for detecting OpenSSL version
    Has training and testing methods
    """
    def __init__(self,
                 num_input,
                 num_units,
                 num_classes,
                 batch_size=100,
                 num_epochs=10,
                 display=False,
                 blacklist=[],
                 whitelist=[]):
        """Creates classifier for finding the version"""

        # Network parameters
        self.l_rate = 0.001
        self.dropout_prob = 1
        self.training_epochs = num_epochs
        self.display_step = 10
        self.batch_size = batch_size
        self.display = display

        self.blacklist = blacklist
        self.whitelist = whitelist

        assert not (self.blacklist and self.whitelist), (
            'Both whitelist and blacklist are defined')

        # Placeholders
        self.X = tf.placeholder('float', [None, num_input], name='X')
        self.Y = tf.placeholder('int64', [None], name='Y')
        self.keep_prob = tf.placeholder('float')

        # Create Network
        self.mlp = MLP([num_input, num_units, num_classes],
                       [tf.nn.relu, tf.identity])

        logits = self.mlp.create_network(self.X, self.keep_prob)

        self.cost = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits, self.Y)
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.l_rate)
        self.optimizer = self.optimizer.minimize(self.cost)

        # for evaluation
        predictions = tf.equal(tf.argmax(logits, 1), self.Y)
        self.accuracy = tf.reduce_mean(tf.cast(predictions, 'float'))

        self.init_op = tf.initialize_all_variables()
        self.saver = tf.train.Saver()

        # for gpu
        self.config = tf.ConfigProto(log_device_placement=False)

    def train(self, train_file=''):
        """Trains classifier
        Training file must be csv formatted
        """

        trX, trY = grab_data(train_file, self.blacklist, self.whitelist)
        training_size = len(trX)

        assert self.batch_size < training_size, (
            'batch size is larger than training_size')

        with tf.Session(config=self.config) as sess:
            sess.run(self.init_op)

            for epoch in range(self.training_epochs):
                avg_cost = 0
                for i in range(0, training_size, self.batch_size):
                    # for batch training
                    upper_bound = i + self.batch_size
                    if upper_bound >= training_size:
                        upper_bound = training_size - 1

                    feed_dict = {
                        self.X: np.atleast_2d(trX[i:upper_bound]),
                        self.Y: np.atleast_1d(trY[i:upper_bound]),
                        self.keep_prob: self.dropout_prob
                    }
                    _, c = sess.run([self.optimizer, self.cost],
                                    feed_dict=feed_dict)

                    avg_cost += c[0] / training_size

                if epoch % self.display_step == 0:
                    self.print(('Epoch: {0:03} with '
                                'cost={1:.9f}').format(epoch + 1, avg_cost))

            self.print('Optimization Finished')

            # save model
            save_path = self.saver.save(sess, './model.ckpt')
            self.print('Model saved in file: {}'.format(save_path))

    def test(self, test_file=''):
        """Trains classifier
        Training file must be csv formatted
        """

        teX, teY = grab_data(test_file, self.blacklist, self.whitelist)

        with tf.Session(config=self.config) as sess:
            self.saver.restore(sess, './model.ckpt')

            _accuracy = sess.run(self.accuracy,
                                 feed_dict={
                                     self.X: teX,
                                     self.Y: teY,
                                     self.keep_prob: 1.0
                                 })
            _accuracy *= 100

            self.print('accuracy={}'.format(_accuracy))

            return _accuracy

    def print(self, val):
        if self.display:
            print(val)