Python Plot Examples, arhuaco.graphics.plot.Plot Python Examples

Example #1

0

Show file

File: performance.py Project: kuronosec/arhuaco

def daniel_first_results():
    linux_total = np.fromfile(
        "/var/lib/arhuaco/data/performance/daniel_no_hard_native.log",
        dtype=float,
        sep="\n")
    docker_total = np.fromfile(
        "/var/lib/arhuaco/data/performance/daniel_no_hard_docker.log",
        dtype=float,
        sep="\n")
    rkt_total = np.fromfile(
        "/var/lib/arhuaco/data/performance/daniel_no_hard_rkt.log",
        dtype=float,
        sep="\n")
    singul_total = np.fromfile(
        "/var/lib/arhuaco/data/performance/daniel_no_hard_sing.log",
        dtype=float,
        sep="\n")
    # Graphically plot the results
    plot = Plot()
    # Linux job vs docker job vs docker+sysdig job
    plot.history2plot([linux_total, docker_total, rkt_total, singul_total],
                      ['Linux', 'Docker', 'Rkt', 'Singularity'],
                      "Stock Kernel", "Number of simultaneous Jobs",
                      "Average runtime [s]",
                      "/var/lib/arhuaco/data/performance/performance-%s.pdf" %
                      time.strftime("%Y%m%d-%H%M%S"), 'lower right',
                      [0.5, 10.5], [4200, 5800])

Example #2

0

Show file

File: performance.py Project: kuronosec/arhuaco

def my_results():
    linux_total = np.fromfile(
        "/var/lib/arhuaco/data/performance/sysdig_total_final.log",
        dtype=float,
        sep="\n")
    docker_total = np.fromfile(
        "/var/lib/arhuaco/data/performance/docker_total_final.log",
        dtype=float,
        sep="\n")
    sysdig_total = np.fromfile(
        "/var/lib/arhuaco/data/performance/linux_total_final.log",
        dtype=float,
        sep="\n")
    # Graphically plot the results
    plot = Plot()
    # Linux job vs docker job vs docker+sysdig job
    plot.history2error([linux_total, docker_total, sysdig_total],
                       [linux_total, docker_total, sysdig_total],
                       ['Linux', 'Docker', 'Arhuaco isolation and monitoring'],
                       "Performance test",
                       "Number of ALICE grid jobs in parallel",
                       "Average runtime [s]",
                       "/var/lib/arhuaco/data/performance/performance-%s.pdf" %
                       time.strftime("%Y%m%d-%H%M%S"), 'lower right', [1, 10],
                       [0, 60000])

Example #3

0

Show file

File: train_svm_hot.py Project: kuronosec/arhuaco

def analyze_syscalls():
    # Parameters
    min_word_count = 5  # Minimum word count
    context = 10  # Context window size
    paths = [
        "/var/lib/arhuaco/data/normal_clean.csv",
        "/var/lib/arhuaco/data/malicious_clean.csv"
    ]
    labels = [0, 1]
    number_samples = 2
    number_samples_w2v = 10000
    num_epochs = 10
    embedding_dim = 10
    # Model Hyperparameters
    max_length = 7
    n_gram = 6
    # Create objects
    data_helpers = DataHelpers(paths, labels, max_length, n_gram,
                               number_samples)
    w2v = W2V()
    sentence_stream = data_helpers.sentence_stream(number_samples_w2v)
    params = w2v.train_word2vec_stream(sentence_stream,
                                       num_features=embedding_dim,
                                       min_word_count=min_word_count,
                                       context=context,
                                       num_epochs=num_epochs)
    # Create the model
    classes = np.array([0, 1])
    clf = SGDClassifier(loss='hinge',
                        penalty="l2",
                        eta0=0.01,
                        learning_rate='constant')
    # Data load
    data_generator = data_helpers.get_data_chunk(params[1])
    # Training the model
    train_accuracy = []
    test_accuracy = []
    x_train, y_train = next(data_generator)
    clf.partial_fit(x_train, y_train, classes=classes)
    for batch in range(num_epochs):
        for sample in range(1000):
            x_train_aux, y_train_aux = next(data_generator)
            # x_test_aux, y_test_aux = next(data_generator)
            x_train = np.concatenate([x_train, x_train_aux], 0)
            y_train = np.concatenate([y_train, y_train_aux], 0)
            # x_test  = np.concatenate([x_test, x_test_aux], 0)
            # y_test  = np.concatenate([y_test, y_test_aux], 0)
        clf.partial_fit(x_train, y_train)
        print("Batch: %d" % batch)
        print('Train Accuracy: %.3f' % clf.score(x_train, y_train))
        # print('Test Accuracy: %.3f' % clf.score(x_test, y_test))
        train_accuracy.append(clf.score(x_train, y_train))
        # test_accuracy.append(clf.score(x_test, y_test))
    # Plot the results
    plot = Plot()
    plot.history2plot([train_accuracy], "Model accuracy", "Epoch", "Accuracy")

Example #4

0

Show file

File: results.py Project: kuronosec/arhuaco

def training_vs_validation_cnn():
    sys_accuracy = np.fromfile(
        "/var/lib/arhuaco/data/logs/sys_accuracy_cnn.log",
        dtype=float,
        sep="\n")
    sys_val_accuracy = np.fromfile(
        "/var/lib/arhuaco/data/logs/sys_val_accuracy_cnn.log",
        dtype=float,
        sep="\n")
    sys_fpr = np.fromfile("/var/lib/arhuaco/data/logs/sys_fpr_cnn.log",
                          dtype=float,
                          sep="\n")
    sys_val_fpr = np.fromfile("/var/lib/arhuaco/data/logs/sys_val_fpr_cnn.log",
                              dtype=float,
                              sep="\n")
    net_accuracy = np.fromfile(
        "/var/lib/arhuaco/data/logs/net_accuracy_cnn.log",
        dtype=float,
        sep="\n")
    net_val_accuracy = np.fromfile(
        "/var/lib/arhuaco/data/logs/net_val_accuracy_cnn.log",
        dtype=float,
        sep="\n")
    net_fpr = np.fromfile("/var/lib/arhuaco/data/logs/net_fpr_cnn.log",
                          dtype=float,
                          sep="\n")
    net_val_fpr = np.fromfile("/var/lib/arhuaco/data/logs/net_val_fpr_cnn.log",
                              dtype=float,
                              sep="\n")
    # Graphically plot the results
    plot = Plot()
    # Training vs validation
    plot.history2plot(
        [sys_accuracy, sys_val_accuracy], ['Training', 'Validation'],
        "System call classification with CNN", "Epoch", "Accuracy",
        "/var/lib/arhuaco/data/logs/sys_conv_accuracy-%s.pdf" %
        time.strftime("%Y%m%d-%H%M%S"), 'lower right', [0, 9], [0.8, 1.0])
    plot.history2plot([sys_fpr, sys_val_fpr], ['Training', 'Validation'],
                      "System call classification with CNN", "Epoch",
                      "False positive rate",
                      "/var/lib/arhuaco/data/logs/sys_conv_fpr-%s.pdf" %
                      time.strftime("%Y%m%d-%H%M%S"), 'upper left', [0, 9],
                      [0, 0.2])
    plot.history2plot(
        [net_accuracy, net_val_accuracy], ['Training', 'Validation'],
        "Network trace classification with CNN", "Epoch", "Accuracy",
        "/var/lib/arhuaco/data/logs/net_conv_accuracy-%s.pdf" %
        time.strftime("%Y%m%d-%H%M%S"), 'lower right', [0, 9], [0.8, 1.0])
    plot.history2plot([net_fpr, net_val_fpr], ['Training', 'Validation'],
                      "Network trace classification with CNN", "Epoch",
                      "False postive rate",
                      "/var/lib/arhuaco/data/logs/net_conv_fpr-%s.pdf" %
                      time.strftime("%Y%m%d-%H%M%S"), 'upper left', [0, 9],
                      [0, 0.2])

Example #5

0

Show file

File: train_svm_hot.py Project: kuronosec/arhuaco

def analyze_network():
    clf = SGDClassifier(loss='hinge', penalty="l2")
    # Word2Vec parameters, see train_word2vec
    min_word_count = 1  # Minimum word count
    context = 4  # Context window size
    paths = [
        "/var/lib/arhuaco/data/dns_normal.log",
        "/var/lib/arhuaco/data/dns_malicious.log"
    ]
    labels = [0, 1]
    number_samples = 10
    num_epochs = 100
    embedding_dim = 5
    # Model Hyperparameters
    max_length = 5
    n_gram = 1

    # Create objects
    data_helpers = data_helpers = DataHelpers(paths, labels, max_length,
                                              n_gram, number_samples)
    w2v = W2V()
    # Load data
    print("Loading data...")
    x, y, vocabulary, vocabulary_inv = data_helpers.load_data()
    embedding_weights, vocabulary = w2v.train_word2vec(x, embedding_dim,
                                                       min_word_count, context)

    classes = np.array([0, 1])
    # Data load
    data_generator = data_helpers.get_data_chunk(vocabulary)
    # Training the model
    train_accuracy = []
    train_loss = []
    test_accuracy = []
    test_loss = []
    for batch in range(num_epochs):
        x_train, y_train = next(data_generator)
        x_test, y_test = next(data_generator)
        clf.partial_fit(x_train, y_train, classes=classes)
        print("Batch: %d" % batch)
        print('Train Accuracy: %.3f' % clf.score(x_train, y_train))
        print('Test Accuracy: %.3f' % clf.score(x_test, y_test))
        train_accuracy.append(clf.score(x_train, y_train))
        test_accuracy.append(clf.score(x_test, y_test))
    # Plot the results
    plot = Plot()
    plot.history2plot([train_accuracy, test_accuracy], "Model accuracy",
                      "Epoch", "Accuracy")

Example #6

0

Show file

File: performance.py Project: kuronosec/arhuaco

def my_second_results():
    # average
    linux_total_avg = np.fromfile(
        "/var/lib/arhuaco/data/performance/linux_avg_final.log",
        dtype=float,
        sep="\n")
    docker_total_avg = np.fromfile(
        "/var/lib/arhuaco/data/performance/docker_avg_final.log",
        dtype=float,
        sep="\n")
    sysdig_total_avg = np.fromfile(
        "/var/lib/arhuaco/data/performance/sysdig_avg_final.log",
        dtype=float,
        sep="\n")
    # standard deviation
    linux_total_std = np.fromfile(
        "/var/lib/arhuaco/data/performance/linux_std_final.log",
        dtype=float,
        sep="\n")
    docker_total_std = np.fromfile(
        "/var/lib/arhuaco/data/performance/docker_std_final.log",
        dtype=float,
        sep="\n")
    sysdig_total_std = np.fromfile(
        "/var/lib/arhuaco/data/performance/sysdig_std_final.log",
        dtype=float,
        sep="\n")
    # Graphically plot the results
    plot = Plot()
    # Linux job vs docker job vs docker+sysdig job
    plot.history2error([linux_total_avg, docker_total_avg, sysdig_total_avg],
                       [linux_total_std, docker_total_std, sysdig_total_std],
                       ['Linux', 'Docker', 'Arhuaco isolation and monitoring'],
                       "Performance test",
                       "Number of ALICE grid jobs in parallel",
                       "Average runtime [s]",
                       "/var/lib/arhuaco/data/performance/performance-%s.pdf" %
                       time.strftime("%Y%m%d-%H%M%S"), 'lower right',
                       [0.5, 10.5], [4200, 5800])

Example #7

0

Show file

File: results.py Project: kuronosec/arhuaco

def comparative_results():
    sys_val_accuracy_cnn = np.fromfile(
        "/var/lib/arhuaco/data/logs/sys_val_accuracy_cnn.log",
        dtype=float,
        sep="\n")
    sys_val_accuracy_svm = np.fromfile(
        "/var/lib/arhuaco/data/logs/sys_val_accuracy_svm.log",
        dtype=float,
        sep="\n")
    sys_val_fpr_cnn = np.fromfile(
        "/var/lib/arhuaco/data/logs/sys_val_fpr_cnn.log",
        dtype=float,
        sep="\n")
    sys_val_fpr_svm = np.fromfile(
        "/var/lib/arhuaco/data/logs/sys_val_fpr_svm.log",
        dtype=float,
        sep="\n")
    net_val_accuracy_cnn = np.fromfile(
        "/var/lib/arhuaco/data/logs/net_val_accuracy_cnn.log",
        dtype=float,
        sep="\n")
    net_val_accuracy_svm = np.fromfile(
        "/var/lib/arhuaco/data/logs/net_val_accuracy_svm.log",
        dtype=float,
        sep="\n")
    net_val_fpr_cnn = np.fromfile(
        "/var/lib/arhuaco/data/logs/net_val_fpr_cnn.log",
        dtype=float,
        sep="\n")
    net_val_fpr_svm = np.fromfile(
        "/var/lib/arhuaco/data/logs/net_val_fpr_svm.log",
        dtype=float,
        sep="\n")
    net_val_acc_gen_svm = np.fromfile(
        "/var/lib/arhuaco/data/logs/net_val_acc_gen_svm.log",
        dtype=float,
        sep="\n")
    # Graphically plot the results
    plot = Plot()
    # Syscall cnn vs svm acc
    plot.history2plot(
        [sys_val_accuracy_cnn[0:10], sys_val_accuracy_svm[0:10]],
        ['CNN validation', 'SVM validation'],
        "CNN vs SVM system call validation accuracy", "Epoch", "Accuracy",
        "/var/lib/arhuaco/data/logs/sys_cnn_svm_accuracy-%s.pdf" %
        time.strftime("%Y%m%d-%H%M%S"), 'lower right', [0, 9], [0, 0.2])
    # Syscall cnn vs svm fpr
    plot.history2plot([sys_val_fpr_cnn[0:10], sys_val_fpr_svm[0:10]],
                      ['CNN validation', 'SVM validation'],
                      "CNN vs SVM system call validation false positive rate",
                      "Epoch", "False positive rate",
                      "/var/lib/arhuaco/data/logs/sys_cnn_svm_fpr-%s.pdf" %
                      time.strftime("%Y%m%d-%H%M%S"), 'upper left', [0, 9],
                      [0, 0.2])
    # Network cnn vs svm acc
    plot.history2plot(
        [net_val_accuracy_cnn[0:10], net_val_accuracy_svm[0:10]],
        ['CNN validation', 'SVM validation'],
        "CNN vs SVM network trace validation accuracy", "Epoch", "Accuracy",
        "/var/lib/arhuaco/data/logs/net_cnn_svm_accuracy-%s.pdf" %
        time.strftime("%Y%m%d-%H%M%S"), 'lower right', [0, 9], [0, 0.2])
    # Network cnn vs svm fpr
    plot.history2plot([net_val_fpr_cnn[0:10], net_val_fpr_svm[0:10]],
                      ['CNN validation', 'SVM validation'],
                      "CNN vs SVM network validation false positive rate",
                      "Epoch", "False positive rate",
                      "/var/lib/arhuaco/data/logs/net_cnn_svm_fpr-%s.pdf" %
                      time.strftime("%Y%m%d-%H%M%S"), 'upper left', [0, 9],
                      [0, 0.2])
    # Network svm original vs svm generated acc
    plot.history2plot(
        [net_val_accuracy_svm[0:10], net_val_acc_gen_svm[0:10]],
        ['SVM validation non generated', 'SVM validation generated'],
        "SVM accuracy comparison: normal data vs generated data", "Epoch",
        "False positive rate",
        "/var/lib/arhuaco/data/logs/net_svm_accuracy-generated-%s.pdf" %
        time.strftime("%Y%m%d-%H%M%S"), 'upper left', [0, 9], [0, 0.2])

Example #8

0

Show file

File: test_svm_bow.py Project: kuronosec/arhuaco

def analyze_syscalls():
    # Parameters
    seed = 5
    verbose = 2

    # Model Hyperparameters
    # Max lenght of one sentence
    max_length = 7
    # Number of lines included in the
    # series
    n_gram = 6
    # Total lenght of the classification
    # object
    sequence_length = max_length * n_gram
    # Size of the vector representing each word
    embedding_dim = 20
    dropout_prob = (0.0, 0.0)
    # Number of neurons in the hidden layer
    hidden_dims = 20

    # Training parameters
    number_samples = 5
    samples_per_epoch = 10000
    num_epochs = 100
    val_split = 0.1

    # Word2Vec parameters, see train_word2vec
    # Minimum word count
    min_word_count = 6
    # Number of words that make sense in the context
    context = 10
    weights_file_svm = "/var/lib/arhuaco/data/models/sys_W_svm-%s"\
                       % time.strftime("%Y%m%d-%H%M%S")
    model_file_svm = "/var/lib/arhuaco/data/models/sys_model_svm-%s.json"\
                     % time.strftime("%Y%m%d-%H%M%S")
    # Training dataset
    paths = [
        "/var/lib/arhuaco/data/normal_clean.csv",
        "/var/lib/arhuaco/data/malicious_clean.csv"
    ]
    # Training labels
    labels_svm = [-1, 1]

    # Create objects
    data_helpers = DataHelpers(paths, None, max_length, n_gram, number_samples,
                               seed)
    w2v = W2V()
    sentence_stream = data_helpers.sentence_stream(samples_per_epoch)
    params = w2v.train_word2vec_stream(sentence_stream,
                                       num_features=embedding_dim,
                                       min_word_count=min_word_count,
                                       context=context,
                                       num_epochs=num_epochs)
    svm = SVM(seed, sequence_length, embedding_dim, dropout_prob, hidden_dims,
              number_samples, num_epochs, val_split, min_word_count, context,
              weights_file_svm, model_file_svm, paths, None, data_helpers,
              verbose)
    svm.get_data(params[0], params[1], params[2])
    svm.build_model()
    print("SVM syscall training")
    history_svm = svm.train_model(samples_per_epoch, labels_svm)
    result = svm.test_model(10000, labels_svm, max_length, n_gram)
    # Graphically plot the results
    plot = Plot()
    # Training vs validation
    plot.history2plot([
        history_svm.history['real_accuracy'],
        history_svm.history['val_real_accuracy']
    ], ['Training', 'Validation'],
                      "SVM accuracy",
                      "Epoch",
                      "Accuracy",
                      "/var/lib/arhuaco/data/models/sys_svm_accuracy-%s.pdf" %
                      time.strftime("%Y%m%d-%H%M%S"),
                      location='lower right')
    # Trainning vs validation fpr
    plot.history2plot([history_svm.history['false_pos_rate'],
                       history_svm.history['val_false_pos_rate']],
                       ['Training', 'Validation'],
                       "SVM false positive rate", "Epoch",
                       "False positive rate",
                       "/var/lib/arhuaco/data/models/sys_svm_fpr-%s.pdf"\
                       % time.strftime("%Y%m%d-%H%M%S"),
                       location='upper right')

Example #9

0

Show file

    def train(self, type="syscall"):
        # Load configuration
        config_object = Configuration()
        if type == "syscall":
            config_object.load_configuration("host")
            configuration = config_object.default_config

            # Training parameters
            configuration['verbose'] = 2
            configuration['samples_per_batch'] = 5
            configuration['samples_per_epoch'] = 100000
            configuration['num_epochs'] = 10
            configuration['val_split'] = 0.1

            configuration['weights_file_svm'] = "/var/lib/arhuaco/data/models/sys_W_svm-%s"\
                                                 % time.strftime("%Y%m%d-%H%M%S")
            configuration['model_file_svm'] = "/var/lib/arhuaco/data/models/sys_model_svm-%s.json"\
                                               % time.strftime("%Y%m%d-%H%M%S")
            # Training dataset
            configuration['paths'] = [
                "/var/lib/arhuaco/data/normal_clean_filtered.csv",
                "/var/lib/arhuaco/data/malicious_clean_filtered.csv"
            ]

            configuration['pdf_paths'] = ["/var/lib/arhuaco/data/models/sys_svm_accuracy-%s.pdf"
                                          % time.strftime("%Y%m%d-%H%M%S"),
                                          "/var/lib/arhuaco/data/models/sys_svm_fpr-%s.pdf"\
                                          % time.strftime("%Y%m%d-%H%M%S")]

        elif type == "network":
            # Load configuration
            config_object = Configuration()
            config_object.load_configuration("network")
            configuration = config_object.default_config

            # Training parameters
            configuration['verbose'] = 2
            configuration['samples_per_batch'] = 5
            configuration['samples_per_epoch'] = 1000
            configuration['num_epochs'] = 10
            configuration['val_split'] = 0.1

            configuration['weights_file_svm'] = "/var/lib/arhuaco/data/models/net_W_svm-%s"\
                                                 % time.strftime("%Y%m%d-%H%M%S")
            configuration['model_file_svm'] = "/var/lib/arhuaco/data/models/net_model_svm-%s.json"\
                                                % time.strftime("%Y%m%d-%H%M%S")
            # Training dataset
            configuration['paths'] = [
                "/var/lib/arhuaco/data/dns_normal.log",
                "/var/lib/arhuaco/data/dns_malicious.log"
            ]
            # "/var/lib/arhuaco/data/dns_malicious_generated.log"]

            configuration['pdf_paths'] = ["/var/lib/arhuaco/data/models/net_svm_accuracy-%s.pdf"
                                          % time.strftime("%Y%m%d-%H%M%S"),
                                          "/var/lib/arhuaco/data/models/net_svm_fpr-%s.pdf"\
                                          % time.strftime("%Y%m%d-%H%M%S")]

        # Create objects
        # First create the sources of data
        data_helper = DataHelpers(
            data_source=configuration['paths'],
            label=None,
            tokens_per_line=configuration['tokens_per_line'],
            number_lines=configuration['number_lines'],
            samples_per_batch=configuration['samples_per_batch'],
            seed=configuration['seed'])

        # Apply the word2vec processing
        w2v = W2V()
        sentence_stream = data_helper.sentence_stream(
            configuration['samples_per_epoch'])
        params = w2v.train_word2vec_stream(
            sentence_stream,
            num_features=configuration['embedding_dim'],
            min_word_count=configuration['min_word_count'],
            context=configuration['context'],
            num_epochs=configuration['num_epochs'])
        embedding_weights = params[0]
        vocabulary = params[1]
        vocabulary_index = params[2]

        # Create the svm network object
        svm_bow = SVM(seed=configuration['seed'],
                      samples_per_batch=configuration['samples_per_batch'],
                      min_word_count=configuration['min_word_count'],
                      context=configuration['context'],
                      weights_file=configuration['weights_file_svm'],
                      model_file=configuration['model_file_svm'],
                      labels=None,
                      verbose=configuration['verbose'])
        svm_bow.set_bow_params(embedding_weights=params[0],
                               vocabulary=params[1],
                               vocabulary_index=params[2])

        # Buid the model
        svm_bow.build_model(
            learn_rate=configuration['learn_rate'],
            momentum=configuration['momentum'],
            decay=configuration['decay'],
            nesterov=configuration['nesterov'],
            regularizer_param=configuration['regularizer_param'],
            dropout_rate=configuration['dropout_prob'],
            embedding_dim=configuration['embedding_dim'],
        )
        print("svm training")
        # Get the data sources
        training_generator = data_helper.get_data_BoW_chunk(
            vocabulary, configuration['labels_svm'])
        validation_generator = data_helper.get_data_BoW_chunk(
            vocabulary, configuration['labels_svm'])

        # Train and validate the model
        history_object = svm_bow.train_model(training_source=training_generator,
                                             validation_source=validation_generator,
                                             samples_per_epoch\
                                             =configuration['samples_per_epoch'],
                                             number_epochs=configuration['num_epochs'],
                                             val_split=configuration['val_split'])
        # Test the model with new data
        # Create a new data source for validation with generated data
        configuration['paths'][1] = '/var/lib/arhuaco/data/dns_malicious.log'
        configuration['samples_per_epoch'] = 1000

        validation_data_helper = DataHelpers(
            data_source=configuration['paths'],
            label=None,
            tokens_per_line=configuration['tokens_per_line'],
            number_lines=configuration['number_lines'],
            samples_per_batch=configuration['samples_per_batch'],
            seed=configuration['seed'] + 3)

        test_generator = validation_data_helper.get_data_BoW_chunk(
            vocabulary, configuration['labels_svm'])

        result = svm_bow.test_model(
            test_data_source=test_generator,
            samples_to_test=configuration['samples_per_epoch'])
        # Graphically plot the results
        plot = Plot()
        # Training vs validation accuracy
        plot.history2plot([
            history_object.history['real_accuracy'],
            history_object.history['val_real_accuracy']
        ], ['Training', 'Validation'], "svm accuracy", "Epoch", "Accuracy",
                          configuration['pdf_paths'][0], 'lower right', [0, 9],
                          [0.8, 1.0])
        # Trainning vs validation fpr
        plot.history2plot([
            history_object.history['false_pos_rate'],
            history_object.history['val_false_pos_rate']
        ], ['Training', 'Validation'], "svm false positive rate", "Epoch",
                          "False positive rate", configuration['pdf_paths'][1],
                          'upper right', [0, 9], [0, 0.2])

Example #10

0

Show file

File: test_conv_w2v.py Project: kuronosec/arhuaco

def analyze_network():
    # Parameters
    seed = 5
    model_variation = 'CNN-non-static'

    # Model Hyperparameters
    # Max lenght of one sentence
    max_length = 5
    # Number of lines included in the
    # series
    n_gram = 1
    # Total lenght of the classification
    # object
    sequence_length = max_length * n_gram
    # Size of the vector representing each word
    embedding_dim = 10
    # Conv. Filters applied to the text
    filter_sizes = (2, 3)
    # Total filters used
    num_filters = 3
    dropout_prob = (0.0, 0.0)
    # Number of neurons in the hidden layer
    hidden_dims = 10

    # Training parameters
    number_samples = 5
    samples_per_epoch = 1000
    num_epochs = 100
    val_split = 0.1
    verbose = 2

    # Word2Vec parameters, see train_word2vec
    # Minimum word count
    min_word_count = 1
    # Number of words that make sense in the context
    context = 4
    weights_file_conv = "/var/lib/arhuaco/data/models/net_W_conv-%s"\
                         % time.strftime("%Y%m%d-%H%M%S")
    model_file_conv = "/var/lib/arhuaco/data/models/net_model_conv-%s.json"\
                      % time.strftime("%Y%m%d-%H%M%S")
    # Training dataset
    paths = [
        "/var/lib/arhuaco/data/dns_normal.log",
        #"/var/lib/arhuaco/data/dns_malicious.log"]
        "/var/lib/arhuaco/data/dns_malicious_generated.log"
    ]
    # Training labels
    labels_conv = [0, 1]

    # Create objects
    data_helpers = DataHelpers(paths, None, max_length, n_gram, number_samples,
                               seed)
    w2v = W2V()
    sentence_stream = data_helpers.sentence_stream(samples_per_epoch)
    params = w2v.train_word2vec_stream(sentence_stream,
                                       num_features=embedding_dim,
                                       min_word_count=min_word_count,
                                       context=context,
                                       num_epochs=num_epochs)
    cnn_w2v = CnnW2v(seed, model_variation, sequence_length, embedding_dim,
                     filter_sizes, num_filters, dropout_prob, hidden_dims,
                     number_samples, num_epochs, val_split, min_word_count,
                     context, weights_file_conv, model_file_conv, paths, None,
                     data_helpers, verbose)
    cnn_w2v.get_data(params[0], params[1], params[2])
    cnn_w2v.build_model()
    print("Convolutional network training")
    history_conv = cnn_w2v.train_model(samples_per_epoch, labels_conv)
    cnn_w2v.paths[1] = "/var/lib/arhuaco/data/dns_malicious.log"
    result = cnn_w2v.test_model(1000, labels_conv, max_length, n_gram)
    # Graphically plot the results
    plot = Plot()
    # Training vs validation
    plot.history2plot([history_conv.history['real_accuracy'],
                       history_conv.history['val_real_accuracy']],
                       ['Training', 'Validation'],
                       "CNN accuracy", "Epoch", "Accuracy",
                       "/var/lib/arhuaco/data/models/net_cnn_accuracy-%s.pdf"\
                       % time.strftime("%Y%m%d-%H%M%S"),
                       location='lower right')
    # Trainning vs validation fpr
    plot.history2plot([history_conv.history['false_pos_rate'],
                       history_conv.history['val_false_pos_rate']],
                       ['Training', 'Validation'],
                       "CNN false positive rate", "Epoch",
                       "False positive rate",
                       "/var/lib/arhuaco/data/models/net_cnn_fpr-%s.pdf"\
                       % time.strftime("%Y%m%d-%H%M%S"),
                       location='upper right')