예제 #1
0
import numpy as np
from read_dataset import read_data

# define input symbols
X, y_array = read_data('./dataset/dataset.csv')

# define variable symbols
W = [np.random.rand(), np.random.rand()]
b = np.random.rand()

lose_rate = 0.01
min_error_threshold = 0.5
train = int(3 / 4 * len(X))
test = len(X) - train


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def derivative_sigmoid(x, w, b):
    wx_plus_b = w * x + b
    return x * sigmoid(wx_plus_b) * (1 - sigmoid(wx_plus_b))


def calculate_y(W, X, b):
    w2 = np.array(W)
    x2 = np.array(X)
    w2.reshape([2, 1])
    np.matmul(x2, w2) + b
    return np.matmul(X, W) + b
예제 #2
0
'''
Building a malware analysis tool, based on automatic feature selection
Automatic Feature Selection: automatic feature weighting and selection
based on statistical properties of the training set, where features
are ranked based upon their significance.
'''
import features_selection
import read_dataset
from sklearn.model_selection import train_test_split
import train
import time

# reads dataset, classify content {malware|not malware}
# returns x = {file|features' occurrences} and y = {labels}
x, y = read_dataset.read_data()

# select features from the already classified dataset
# to be used to train our model
print('\nFeatures Selection based on KBest: ')
features_selection.select_features_k_best(x, y)

print('\nFeatures Selection based on Recursive Features Elimination: ')
features_selection.select_features_recursive_feature_elimination(x, y)

print('\nFeatures Selection based on Extra trees classifier: ')
features_selection.select_features_extra_trees(x, y)

print('\nFeatures Selection based on Random Forest classifier: ')
features_selection.select_features_random_forest(x, y)

# Split data into training and testing sets of 80% - 20%
예제 #3
0
from sklearn.model_selection import train_test_split

from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Default params
batch_size = 128
epochs = 100
training_dir = "training"
checkpoint_format = "weights.{epoch:04d}-{val_loss:.2f}.h5"
period = 5

# Create training dir
common.create_dir_if_not_exists(training_dir)

# Read data
X_train, y_train = read_dataset.read_data('train_new/')
print(f'X_train.shape = {X_train.shape}, y_train.shape = {y_train.shape}')

# Create model
model = common.create_baseline_model()  # Baseline model
# model = common.create_lenet5_model() # LeNet5
# model = common.create_alexnet_model() # AlexNet
# model = common.create_vgg_model()
# model = common.create_cnn_model()

# Summary model
print("=" * 80)
model.summary()
input("Press Enter to continue...")
print("=" * 80)
from read_dataset import read_data, read_labels, convert_to_ascii
import os
import numpy as np

path_list = ["/Train/"]  # ,"/Test/","/Validate/"]
label_list = ["Train_labels.txt"]  # ,"Test_labels.txt","Validate_labels.txt"]

for path in path_list:
    x = read_data(os.getcwd() + path)
    x = np.asarray([i[1].T for i in sorted(x.items())])
for label in label_list:
    y = read_labels(label)
    y = np.array([np.array(convert_to_ascii(i)) for i in y.values()])

np.save("labels.npy", y)

np.save("train_data.npy", x)
예제 #5
0
import sys, os
import numpy as np

import common
import read_dataset

# Read data
X_test, y_test = read_dataset.read_data('test_new/')
print(f'X_test.shape = {X_test.shape}, y_test.shape = {y_test.shape}')

# Create model
model = common.create_baseline_model()  # Baseline model
# model = common.create_lenet5_model() # LeNet5
# model = common.create_alexnet_model() # AlexNet model
# model = common.create_vgg_model() # VGG model
# model = common.create_cnn_model()

# Load weights
try:
    checkpoint_filepath = sys.argv[1]
    model.load_weights(checkpoint_filepath)
except IndexError:
    print("Usage: " + os.path.basename(__file__) + " <checkpoint_filepath>")
    sys.exit(1)

# Evaluate
print("=" * 80)
print('Evaluate on test data')
results = model.evaluate(X_test, y_test)
print(f'Test loss = {results[0]:.2f}')
print(f'Test acc = {results[1]*100:.2f}%')