def create_random_sets():
    print("Generating TRAINING set...")
    training_set = onehot.generate_set(
        set_size, malware_ratio)  # generate random training set
    print("Generating TRAINING input...")
    data, labels = onehot.generate_input(
        training_set, total_features)  # perform one-hot encoding
    print("Generating TESTING set...")
    testing_set = onehot.generate_set(
        testing_set_size, malware_ratio)  # generate random testing set
    print("Generating TESTING input...")
    test_data, test_labels = onehot.generate_input(
        testing_set, total_features)  # perform one-hot encoding
    return data, labels, test_data, test_labels  # return train data - labels and test data - labels
def create_sets():

    if os.path.isfile("training_set_8500.txt") is False:
        set_size = 8500
        malware_ratio = 0.3
        print("Creating data-labels...")
        print("Generating TESTING set...")
        training_set = onehot.generate_set(
            set_size, malware_ratio)  # generate random testing set
        with open("training_set_1500.txt", "w") as file:
            for item in training_set:
                file.write(str(item) + "\n")

    if os.path.isfile("testing_set_8500.txt") is False:
        set_size = 8500
        malware_ratio = 0.3
        print("Creating data-labels...")
        print("Generating TESTING set...")
        testing_set = onehot.generate_set(
            set_size, malware_ratio)  # generate random testing set
        with open("testing_set_1500.txt", "w") as file:
            for item in testing_set:
                file.write(str(item) + "\n")

    training_set = []
    testing_set = []

    with open(
            "training_set_8500.txt", "r"
    ) as file:  # read training set file and append applications to list
        for line in file:
            line.strip()  # remove whitespace
            line = line[:-1]  # remove \n
            training_set.append(line)  # add item to list
    with open(
            "testing_set_8500.txt", "r"
    ) as file:  # read testing set file and append applications to list
        for line in file:
            line.strip()
            line = line[:-1]
            testing_set.append(line)
    print("Generating TRAINING input...")
    data, labels = onehot.generate_input(
        training_set, total_features)  # perform one-hot encoding
    print("Generating TESTING input...")
    test_data, test_labels = onehot.generate_input(
        testing_set, total_features)  # perform one-hot encoding
    return data, labels, test_data, test_labels
Ejemplo n.º 3
0
def create_random_sets(set_size=1500, malware_ratio=0.3):
    print("Generating set...")
    testing_set = onehot.generate_set(set_size, malware_ratio)  # generate random set
    print("Generating input...")
    # shuffle the set randomly and perform one-hot encoding
    test_data, test_labels = onehot.generate_input(testing_set, total_features)
    return test_data, test_labels
def create_set():
    if os.path.isfile("testing_set_200.txt") is False:
        set_size = 200
        malware_ratio = 0.5
        print("Creating data-labels...")
        print("Generating TESTING set...")
        testing_set = onehot.generate_set(
            set_size, malware_ratio)  # generate random testing set
        with open("testing_set_200.txt", "w") as file:
            for item in testing_set:
                file.write(str(item) + "\n")
    testing_set = []  # the list of testing set
    with open(
            "testing_set_200.txt", "r"
    ) as file:  # read testing set file and append applications to list
        for line in file:
            line.strip()
            line = line[:-1]
            testing_set.append(line)
    print("Generating TESTING input...")
    test_data, test_labels = onehot.generate_input(
        testing_set, total_features)  # perform one-hot encoding
    return test_data, test_labels
Ejemplo n.º 5
0

if __name__ == "__main__":
    total_features = 545333  # total unique features
    set_size = 2000  # set site that will be used to create random training set
    testing_set_size = 2000  # set site that will be used to create random test set
    malware_ratio = 0.3  # malware ratio in the set size

    onehot.create_list_of_apps()  # function from set_one_encoding.py

    # check if a predefined training
    if os.path.isfile("training_set_2000.txt") is False and os.path.isfile(
            "testing_set_2000.txt") is False:
        print("Creating data-labels...")
        print("Generating TRAINING set...")
        training_set = onehot.generate_set(
            set_size, malware_ratio)  # generate random training set
        with open("training_set_2000.txt", "w") as file:
            for item in training_set:
                file.write(str(item) + "\n")

        print("Generating TESTING set...")
        testing_set = onehot.generate_set(
            testing_set_size, malware_ratio)  # generate random testing set
        with open("testing_set_2000.txt", "w") as file:
            for item in testing_set:
                file.write(str(item) + "\n")

    training_set = []  # the list of training set
    testing_set = []  # the list of testing set

    with open(