예제 #1
0
def test_reader():
    from sys import argv, exit
    import traceback

    if len(argv) < 4:
        print(argv[0], '<input_file>', '<memory_file>', '<bin_dir>')
        exit(0)

    logger.log_start(logging.DEBUG)

    try:
        ofile = tempfile.mkstemp(text=True)
        ofilefd = fdopen(ofile[0], 'w')

        mem_map = read_memory_file(argv[2])

        for tuple in disasm_pt_file(argv[1], argv[3], mem_map):
            if tuple is None:
                break
            ofilefd.write(str(tuple) + "\n")

        ofilefd.close()
    except:
        traceback.print_exc()
        ofilefd.close()
        remove(ofile[1])
        logger.log_stop()
        exit(1)

    logger.log_info(module_name, 'Wrote generated tuples to ' + str(ofile[1]))
    logger.log_stop()
예제 #2
0
def test_generator():
    from sys import argv, exit
    import reader
    import tempfile

    if len(argv) < 5:
        print(argv[0], '<input_file>', '<bin_dir>', '<memory_file>',
              '<seq_len>')
        exit(0)

    logger.log_start(logging.DEBUG)

    try:
        ofile = tempfile.mkstemp(text=True)
        ofilefd = os.fdopen(ofile[0], 'w')

        filters.set_filters(['ret'])
        memory = reader.read_memory_file(argv[3])

        input, output = start_generator(2,
                                        reader.disasm_pt_file,
                                        seq_len=int(argv[4], 10))
        input.put((None, argv[1], argv[2], memory))
        while True:
            try:
                res = output.get(True, 5)
            except queue.Empty:
                count = get_in_service()
                if get_in_service() == 0:
                    break
                else:
                    logger.log_debug(
                        module_name,
                        str(count) + ' workers still working on jobs')
                    continue
            ofilefd.write(str(res[0]) + ": " + str(res[1]) + "\n")

        stop_generator(10)
        ofilefd.close()
    except:
        traceback.print_exc()
        ofilefd.close()
        os.remove(ofile[1])
        logger.log_stop()
        exit(1)

    logger.log_info(module_name, 'Wrote generated tuples to ' + str(ofile[1]))
    logger.log_stop()
예제 #3
0
    options, args = parser.parse_args()

    if len(args) < 1:
        parser.print_help()
        sys.exit(0)

    # Keras likes to print $@!& to stdout, so don't import it until after the input parameters have been validated
    from keras.models import Model, Sequential, model_from_json
    from keras.layers import Dense, LSTM, Embedding, Activation, Dropout
    from keras import optimizers

    root_dir = args[0]

    # Initialization
    logger.log_start(options.log_level)

    # Input validation
    errors = False
    if options.threads < 1:
        logger.log_error(MODULE_NAME, 'Parsing requires at least 1 thread')
        errors = True

    if options.seq_len < 2:
        logger.log_error(MODULE_NAME, 'Sequence length must be at least 2')
        errors = True

    if options.batch_size < 1:
        logger.log_error(MODULE_NAME, 'Batch size must be at least 1')
        errors = True
예제 #4
0
# Bix 8/28/18
# Main

try:
    TESTING = False
    TANKS_ENABLED = False

    import emr3, logger, time, raspi, tanks

    if TANKS_ENABLED:
        import RPi.GPIO as GPIO

    if not TESTING:
        time.sleep(5)

    logger.log_start()

    CODE_VERSION = '1.0.0'
    rubus = raspi.Raspi()
    emr = emr3.EMR3()
    DELAY_TIME = 1
    no_connection_counter = 0
    hello_sent = False
    trans_counter = 3  # default to 3 so all new trans are retrieved on startup
    loop_counter = 1

    if TANKS_ENABLED:
        tanks = tanks.Tanks()

    while True:
        if not hello_sent:
예제 #5
0
def main():
    global edges, max_seq

    # Parse input arguments
    parser = OptionParser(
        usage='Usage: %prog [options] pt_trace_dir output_file')
    parser.add_option('-r',
                      '--parse-ret',
                      action='store_true',
                      dest='parse_ret',
                      help='Consider returns')
    parser.add_option('-c',
                      '--parse-icall',
                      action='store_true',
                      dest='parse_icall',
                      help='Consider indirect calls')
    parser.add_option('-j',
                      '--parse-ijmp',
                      action='store_true',
                      dest='parse_ijmp',
                      help='Consider indirect jumps')
    parser.add_option('-s',
                      '--sequence-length',
                      action='store',
                      dest='max_seq',
                      type='int',
                      default=32,
                      help='Max sequence length to calculate (default: 32)')

    options, args = parser.parse_args()

    if len(args) != 2:
        parser.print_help()
        sys.stdout.write("\n  Note: Only preprocessed traces are supported\n")
        sys.exit(1)

    trace_filepath = os.path.join(args[0], 'trace_parsed.gz')
    opath = args[1]
    max_seq = options.max_seq

    # Input validation
    if not os.path.isfile(trace_filepath):
        sys.stderr.write('Error: ' + str(trace_filepath) +
                         " either does not exist or is not a file\n")
        sys.exit(1)

    if options.parse_ret:
        filters.add_filter('ret')

    if options.parse_icall:
        filters.add_filter('icall')

    if options.parse_ijmp:
        filters.add_filter('ijmp')

    if filters.get_num_enabled() == 0:
        sys.stderr.write(
            "Error: Must specify at least one thing to learn (-r, -c, -j)\n")
        sys.exit(1)

    # Initialization
    logger.log_start(20)
    history = list()  # History of past basic blocks

    # edges is a three-level dictionary where the keys for the first layer are sequence length,
    # the keys for the second layer are source BBID(s), and the keys for the third layer is
    # destination BBID. The value is count (i.e., how many times that (src, dst) pair has occurred.
    edges = dict()
    for seq_len in range(1, max_seq + 1):
        edges[seq_len] = dict()

    # Parsing
    logger.log_info(module_name, 'Parsing trace')
    for tuple in reader.read_preprocessed(trace_filepath):
        if tuple is None:
            break  # End of trace

        src_bbid, dst_bbid, instr = tuple[:3]

        # Update history
        history.append(src_bbid)
        if len(history) > max_seq:
            history.pop(0)

        if not True in [func(tuple) for func in filters.enabled_filters]:
            continue

        for seq_len in range(1, min(len(history), max_seq) + 1):
            insert(history[-seq_len:], dst_bbid)

    # Distribution of how many possible destinations sources have, up to df_max destinations.
    logger.log_info(module_name, 'Calculating distributions')
    df_max = 100
    df = np.zeros((max_seq, df_max), dtype=int)

    for seq_len in range(1, max_seq + 1):
        for src_bbid in edges[seq_len]:
            dst_size = len(edges[seq_len][src_bbid].keys())
            if dst_size <= df_max:
                df[seq_len - 1][dst_size - 1] += 1
            else:
                df[seq_len - 1][df_max - 1] += 1

    # Save statistics
    logger.log_info(module_name, 'Saving statistics to ' + str(opath))
    with open(opath, 'w') as ofile:
        # Header
        ofile.write('seq_len,' +
                    ','.join([str(x) for x in range(1, df_max + 1)]) + "\n")
        # Data
        for seq_len in range(1, max_seq + 1):
            ofile.write(
                str(seq_len) + ',' +
                ','.join([str(x) for x in df[seq_len - 1]]) + "\n")

    # Cleanup
    logger.log_stop()
예제 #6
0
def main():
    """Main"""
    global threshold

    parser = OptionParser(usage='Usage: %prog [options] eval_dir', version='Barnum Classifier ' + module_version)
    parser.add_option('-f', '--force', action='store_true',
                      help='Force threshold to produce no false positives (benign classified as malicious)')
    parser.add_option('-s', '--save', action='store', type='str', default=None,
                      help='Save classifier to given filepath (default: no saving)')
    parser.add_option('-l', '--load', action='store', type='str', default=None,
                      help='Use a previously saved classifier instead of making a new one')
    parser.add_option('-c', '--csv', action='store', type='str', default=None,
                      help='Save CSV of results to given filepath (default: no CSV)')
    parser.add_option('-p', '--plot', action='store', type='str', default=None,
                      help='Save plot as a PNG image to the given filepath (default: no plotting)')
    parser.add_option('-r', '--roc', action='store', type='str', default=None,
                      help='Save CSV plotting ROC curve to filepath (default: not saved)')
    parser.add_option('-w', '--workers', action='store', dest='workers', type='int', default=cpu_count(),
                      help='Number of workers to use (default: number of cores)')
    parser.add_option('-i', '--ignore-cache', action='store_true',
                      help='Do not use caching')

    options, args = parser.parse_args()

    if len(args) != 1 or options.workers < 1:
        parser.print_help()
        sys.exit(ERROR_INVALID_ARG)

    logger.log_start(20)
    logger.log_info(module_name, 'Barnum Classifier ' + module_version)

    idirpath = args[0]

    if not os.path.isdir(idirpath):
        logger.log_error(module_name, 'ERROR: ' + idirpath + " is not a directory")
        logger.log_stop()
        sys.exit(ERROR_INVALID_ARG)

    files = [os.path.join(idirpath, f) for f in os.listdir(idirpath) if os.path.isfile(os.path.join(idirpath, f))]
    num_benign = len([fp for fp in files if 'benign' in os.path.basename(fp)])
    num_malicious = len([fp for fp in files if 'malicious' in os.path.basename(fp)])

    if options.load is None and (num_benign == 0 or num_malicious == 0):
        logger.log_error(module_name, "Need at least 1 malicious and 1 benign sample to train a classifier")
        logger.log_stop()
        sys.exit(ERROR_INVALID_ARG)

    if not options.roc is None and (num_benign == 0 or num_malicious == 0):
        logger.log_error(module_name, "Need at least 1 malicious and 1 benign sample to plot a ROC curve")
        logger.log_stop()
        sys.exit(ERROR_INVALID_ARG)

    if not options.ignore_cache:
        init_cache()

    # Calculate average accuracy and confidence for each sample
    logger.log_info(module_name, "Parsing " + idirpath)
    pool = Pool(options.workers)
    data = [sample for sample in pool.map(parse_file, zip(files, [options] * len(files))) if sample[0] < 2]
    pool.close()
    ys = np.array([sample[0] for sample in data])
    xs = np.array([sample[1:3] for sample in data])

    if options.load is None:
        logger.log_info(module_name, "Creating classifier")
        # Train a new classifier from scratch
        if options.force:
            # Use ADASYN to over sample the benign class until FP falls to 0
            warnings.filterwarnings("ignore", module="imblearn")
            fp = 1.0
            ben_cnt = len([y for y in ys if y == 0])
            mal_cnt = len(ys) - ben_cnt
            ben_step = max(1, int(ben_cnt * 0.1))

            while fp > 0.0:
                ben_cnt += ben_step
                try:
                    xs_os, ys_os = ADASYN({0: ben_cnt, 1: mal_cnt}, n_jobs=options.workers).fit_resample(xs, ys)
                except ValueError:
                    continue  # Happens if change in counts produces too little change in ratio

                svm = SVC(kernel='linear')
                svm.fit(xs_os, ys_os)

                results = [[sample, svm.predict([sample[1:3]])] for sample in data]
                benign = [sample for sample in results if sample[0][0] == 0]
                fps = [sample for sample in results if sample[0][0] == 0 and sample[1] == 1]
                fp = float(len(fps)) / float(len(benign))
        else:
            svm = SVC(kernel='linear')
            svm.fit(xs, ys)
    else:
        # Use a previously saved classifier
        logger.log_info(module_name, "Loading classifier from " + options.load)
        try:
            svm = joblib.load(options.load)
            nu = None
        except Exception as ex:
            logger.log_error(module_name, "Failed to load classifier: " + str(ex))
            logger.log_stop()
            sys.exit(ERROR_RUNTIME)

    # Metrics
    results = [[sample, svm.predict([sample[1:3]])] for sample in data]
    benign = [sample for sample in results if sample[0][0] == 0]
    malicious = [sample for sample in results if sample[0][0] == 1]
    fps = [sample for sample in results if sample[0][0] == 0 and sample[1] == 1]
    fns = [sample for sample in results if sample[0][0] == 1 and sample[1] == 0]

    if len(benign) > 0:
        fp = float(len(fps)) / float(len(benign))
    else:
        fp = 'N/A'
    if len(malicious) > 0:
        fn = float(len(fns)) / float(len(malicious))
    else:
        fn = 'N/A'

    logger.log_info(module_name, "----------")
    logger.log_info(module_name, "FP: " + str(fp))
    logger.log_info(module_name, "FN: " + str(fn))
    logger.log_info(module_name, "----------")

    # Saving CSV
    if not options.csv is None:
        logger.log_info(module_name, "Saving CSV to " + options.csv)
        try:
            with open(options.csv, 'w') as csv_file:
                csv_file.write("true_label,pred_label,avg_accuracy,avg_confidence,name\n")
                for result in results:
                    csv_file.write(','.join([str(result[0][0]), str(result[1][0]), str(result[0][1]), str(result[0][2]), result[0][3]]) + "\n")
        except Exception as ex:
            module.log_error(module_name, "Failed to save CSV: " + str(ex))

    # Saving Classifier
    if not options.save is None:
        logger.log_info(module_name, "Saving classifier to " + options.save)
        try:
            joblib.dump(svm, options.save)
        except:
            logger.log_error(module_name, "Failed to save classifier to " + options.save)

    # Plotting
    if not options.plot is None:
        logger.log_info(module_name, "Saving plot to " + options.plot)
        axes = plt.gca()
        axes.set_xlim([0, 1])
        axes.set_ylim([0, 1])
        w = svm.coef_[0]
        a = -w[0] / w[1]
        xx = np.linspace(0, 1)
        yy = a * xx - (svm.intercept_[0]) / w[1]
        plt.scatter([sample[0][1] for sample in benign], [sample[0][2] for sample in benign], marker='o', c='blue', s=20)
        plt.scatter([sample[0][1] for sample in malicious], [sample[0][2] for sample in malicious], marker='x', c='red', s=20)
        plt.plot(xx, yy, 'k--')
        plt.xlabel('Wrong Prediction (%)')
        plt.ylabel('Average Confidence (%)')
        try:
            plt.savefig(options.plot)
        except:
            logger.log_error(module_name, "Failed to save plot")

    # ROC
    if not options.roc is None:
        logger.log_info(module_name, "Saving ROC to " + options.roc)
        make_roc(options.roc, data, svm)

    logger.log_stop()
예제 #7
0
def main():

    #Create config builder values
    #Game Parameters
    HEIGHT = config_values.HEIGHT
    WIDTH = config_values.WIDTH
    WALL_DENSITY = config_values.WALL_DENSITY
    PILL_DENSITY = config_values.WALL_DENSITY
    FRUIT_CHANCE = config_values.FRUIT_CHANCE
    FRUIT_SCORE = config_values.FRUIT_SCORE
    TIME_MULT = config_values.TIME_MULT

    #Paramters
    RUNS = config_values.RUNS
    EVALS = config_values.EVALS

    #EA Parameters
    MAX_DEPTH = config_values.MAX_DEPTH
    PAC_POP_SIZE = config_values.pac_population_size
    GHOST_POP_SIZE = config_values.ghost_population_size
    PAC_GEN_STEP = config_values.pac_generation_step
    GHOST_GEN_STEP = config_values.ghost_generation_step
    P_SELECT = config_values.parent_selection
    OVER_S = config_values.over_sel
    S_SELECT = config_values.survival_selection
    T_SELECT = config_values.termination
    PAC_SUR_K = config_values.pac_survival_k
    GHOST_SUR_K = config_values.ghost_survival_k
    PAC_P_COEFF = config_values.pac_parsimony
    GHOST_P_COEFF = config_values.ghost_parsimony
    TERM_EVALS = config_values.term_evals
    CONVERGE = config_values.convergence
    MUT_RATE = config_values.mutation_rate
    P_UPPER = config_values.p_upper

    #Paths
    LOG = config_values.LOG
    GAME = config_values.BEST_GAME
    PAC_CONTROLLER = config_values.PAC
    GHOST_CONTOLLER = config_values.GHOST


    best_pac_fitness_all_runs = -1
    best_ghost_fitness_all_runs = 1

    #Starting logging
    logger.log_start(config_values)

    #Create the Game dictinary. Add 2 to compensate for border.
    game_dict = {
                "height" : HEIGHT+2,
                "width" : WIDTH+2,
                "wall_density" : WALL_DENSITY,
                "pill_density" : PILL_DENSITY,
                "fruit_chance" : FRUIT_CHANCE,
                "fruit_score" : FRUIT_SCORE,
                "time_mult" : TIME_MULT,
                }

    for i in range(RUNS):
        #Starting this Run
        print("Starting run {}".format(i+1))
        #Insert now log block...
        logger.log_new_run(LOG, i)

        #Create the EA instance
        EA = Evolution(PAC_POP_SIZE, GHOST_POP_SIZE, PAC_GEN_STEP, GHOST_GEN_STEP, P_SELECT, S_SELECT, T_SELECT, PAC_SUR_K, GHOST_SUR_K, PAC_P_COEFF, GHOST_P_COEFF, OVER_S, TERM_EVALS, CONVERGE, MUT_RATE, game_dict, MAX_DEPTH, P_UPPER)

        best_pac_this_run = EA.get_best_fitness()
        best_ghost_this_run = EA.get_best_ghost_fitness()

        #Better fitnesses may have emerged this run's inital population
        if best_pac_this_run > best_pac_fitness_all_runs:
            #print the game and assign
            print_game(EA.best_world_string())
            #Game contoller
            EA.get_best_member().print_controller(PAC_CONTROLLER)
            best_pac_fitness_all_runs = best_pac_this_run

        #Now for chost
        if best_ghost_this_run < best_ghost_fitness_all_runs:
            #Just print contoller and assign
            worst_game(EA.worst_world_string())
            EA.get_best_ghost().print_controller(GHOST_CONTOLLER)
            best_ghost_fitness_all_runes = best_ghost_this_run

        #Start this runs log
        logger.log_new_entry(LOG, max(PAC_POP_SIZE, GHOST_POP_SIZE), best_pac_this_run, EA.get_average_fitness())

        #Since a fitness evaluation is new defined as a game being player, when creating generations the number of
        #games played is max(pacman_lambda, ghost_lambda)
        for j in range((max(PAC_POP_SIZE, GHOST_POP_SIZE)+max(PAC_GEN_STEP, GHOST_GEN_STEP)), EVALS+1, max(PAC_GEN_STEP, GHOST_GEN_STEP)):
            #Main evolution loop

            #Create the next generation
            EA.create_generation()

            #Dump pools into their poplation
            EA.pac_dump_pool()
            EA.ghost_dump_pool()

            #Do the survival selection for both populations
            EA.do_pac_survival_selection()
            EA.do_ghost_survival_selection()

            #Log entry
            best_pac_this_run = EA.get_best_fitness()
            best_ghost_this_run = EA.get_best_ghost_fitness()

            #Check to see if any better controllers have emerged from the next generation
            #log entry
            logger.log_new_entry(LOG, j, best_pac_this_run, EA.get_average_fitness())

            if best_pac_this_run > best_pac_fitness_all_runs:
                #print the game and assign
                print_game(EA.best_world_string())
                #Game contoller
                EA.get_best_member().print_controller(PAC_CONTROLLER)
                best_pac_fitness_all_runs = best_pac_this_run

            if best_ghost_this_run <= best_ghost_fitness_all_runs:
                #Just print contoller and assign
                #Print the worst game for testing
                worst_game(EA.worst_world_string())
                EA.get_best_ghost().print_controller(GHOST_CONTOLLER)
                best_ghost_fitness_all_runs = best_ghost_this_run

            if EA.determine_termination():
                break
예제 #8
0
def main():
    # Parse input arguments
    parser = OptionParser(
        usage='Usage: %prog [options] trace_directory bin_directory')
    parser.add_option(
        '-f',
        '--force',
        action='store_true',
        help='If a complete or partial output already exists, overwrite it.')
    parser.add_option(
        '-t',
        '--timeout',
        action='store',
        type='int',
        default=None,
        help='Max seconds to run before quitting (default: infinite).')
    parser.add_option(
        '-p',
        '--no-partial',
        action='store_true',
        help='If timeout is reached, do not save the partially parsed trace.')
    options, args = parser.parse_args()

    if len(args) < 2:
        parser.print_help()
        sys.exit(0)

    data_dir = args[0]
    bin_dir = args[1]

    logger.log_start(logging.INFO)

    # Input validation
    if not os.path.isdir(data_dir):
        logger.log_error(module_name, data_dir + ' is not a directory')
        logger.log_stop()
        sys.exit(1)

    if not os.path.isdir(bin_dir):
        logger.log_error(module_name, bin_dir + ' is not a directory')
        logger.log_stop()
        sys.exit(1)

    if options.timeout is None and options.no_partial:
        logger.log_warning(
            module_name, "Setting --no-partial without --timeout does nothing")

    # Make sure all the expected files are there
    mem_file = None
    trace_file = None

    files = os.listdir(data_dir)
    for file in files:
        if file == 'mapping.txt' or file == 'mapping.txt.gz':
            mem_file = os.path.join(data_dir, file)
        elif file == 'trace_0' or file == 'trace_0.gz':
            trace_file = os.path.join(data_dir, file)

    if mem_file is None:
        logger.log_error(
            module_name,
            'Could not find mapping.txt or mapping.txt.gz in ' + data_dir)
        logger.log_stop()
        sys.exit(1)

    if trace_file is None:
        logger.log_error(module_name,
                         'Could not find trace_0 or trace_0.gz in ' + data_dir)
        logger.log_stop()
        sys.exit(1)

    # Parse the memory file
    mem_map = reader.read_memory_file(mem_file)
    if mem_map is None:
        logger.log_error(module_name, 'Failed to parse memory mapping file')
        logger.log_stop()
        sys.exit(1)

    # We're ready to parse the trace
    o_filepath = os.path.join(data_dir, 'trace_parsed.gz')

    if os.path.isfile(o_filepath) and not options.force:
        logger.log_error(module_name, 'Preprocess file already exists')
        logger.log_stop()
        sys.exit(1)

    if os.path.isfile(o_filepath + '.part') and not options.force:
        logger.log_error(module_name, 'Partial preprocess file already exists')
        logger.log_stop()
        sys.exit(1)

    entries = 0
    with gzip.open(o_filepath + '.part', 'wb') as ofile:
        for instr in reader.disasm_pt_file(trace_file, bin_dir, mem_map,
                                           options.timeout):
            if instr is None:
                break
            ofile.write(pack_instr(instr))
            entries += 1

    if reader.DISASM_TIMEOUT.is_set() and options.no_partial:
        logger.log_info(module_name, "Deleting partial trace")
        os.remove(o_filepath + '.part')
    elif entries > 0:
        os.rename(o_filepath + '.part', o_filepath)
    else:
        logger.log_error(module_name, 'No output produced, empty file')
        os.remove(o_filepath + '.part')

    logger.log_stop()
예제 #9
0
def main():
    lg.log_start()
    options = ["Create CSV File", "View files", "Graphing", "Modelling", "Calculate"]

    print("--ComPhys--")
    for i in range(len(options)):
        optNum = "[" + str(i + 1) + "]"
        print(optNum + " " + options[i])

    selection = int(input("Selection:")) - 1

    # Creating a CSV file by entering data points manually
    if selection == 0:
        lg.log("Displaying CSV file creation options")
        print("\n--Create CSV File--\n")
        newfilename = input("Please enter file name:")
        createdfile = fio.createfile(newfilename)

        print("Enter values as x,y pairs. Ex: 4,5")  # Instructions for the user
        print("Enter z when finished.")

        addvals = True
        while addvals:  # A while loop that takes data points from the user and writes them to the file
            newval = input("Enter data: ")

            if newval == "z":
                addvals = False
                print("File saved.")
            else:
                createdfile.write(newval + "\n")

        lg.log("New CSV file " + newfilename + " created and saved")

    # Viewing a file in the working directory
    elif selection == 1:
        lg.log("Displaying files in directory")
        print("\n--Files in Directory--\n")
        file_list = listdir('data')
        for i in range(len(file_list)):
            print(("[" + str(i + 1) + "]" + "\t" + file_list[i]))

        selection = int(input("Selection:")) - 1

        with open("data/" + file_list[selection]) as file:
            for line in file:
                print(line)

    # Graphing
    # TODO: Currently set up for testing, needs to be modified for general use
    elif selection == 2:
        lg.log("Displaying graphing options")
        print("\n--Graphing Options--\n")
        print("[1] Line Chart (Default)")

        selection = int(input("Selection:")) - 1

        xvals = []
        yvals = []

        with open("data/testdata.csv") as file:
            for line in file:
                vals = line.split(",")
                xvals.append(vals[0])
                yvals.append(vals[1])

        # if selection == 1:

        graphing.plot.defaultchart("TEST CHART", "TEST_X", "TEST_Y", xvals, yvals)

    # Physical Modelling
    elif selection == 3:
        lg.log("Displaying modelling options")
        print("\n--Modelling Options--\n")
        print("[1] Projectile Motion Trajectory (2D)")


    # Calculation Options
    elif selection == 4:
        lg.log("Displaying calculation options")
        print("\n--Calculation Options--\n")

    # Steps taken at the end of the program- printing a closing line and ending the log
    print("~~~~~~~~~~~~~End of Program~~~~~~~~~~~~~")
    lg.close()
예제 #10
0
def main():
    """Main"""
    parser = OptionParser(usage='Usage: %prog [options] eval_dir',
                          version='Barnum Cluster ' + module_version)
    parser.add_option(
        '-c',
        '--csv',
        action='store',
        type='str',
        default=None,
        help='Save CSV of results to given filepath (default: no CSV)')
    parser.add_option(
        '-p',
        '--plot',
        action='store',
        type='str',
        default=None,
        help=
        'Save plot as a PNG image to the given filepath (default: no plotting)'
    )
    parser.add_option(
        '-w',
        '--workers',
        action='store',
        dest='workers',
        type='int',
        default=cpu_count(),
        help='Number of workers to use (default: number of cores)')
    parser.add_option('--max-classes',
                      action='store',
                      type='int',
                      default=256,
                      help='How many classes to use (default: 256)')
    parser.add_option(
        '--min-samples',
        action='store',
        type='int',
        default=4,
        help='Minimum samples to form a cluster in DBSCAN (default: 4)')
    parser.add_option('--eps',
                      action='store',
                      type='float',
                      default=0.03,
                      help='Epsilon parameter to DBSCAN (default: 0.03)')

    options, args = parser.parse_args()

    if len(args) != 1 or options.workers < 1:
        parser.print_help()
        sys.exit(ERROR_INVALID_ARG)

    logger.log_start(20)
    logger.log_info(module_name, 'Barnum Cluster %s' % module_version)

    idirpath = args[0]

    if not os.path.isdir(idirpath):
        logger.log_error(module_name,
                         'ERROR: %s is not a directory' % idirpath)
        logger.log_stop()
        sys.exit(ERROR_INVALID_ARG)

    files = [
        os.path.join(idirpath, f) for f in os.listdir(idirpath)
        if os.path.isfile(os.path.join(idirpath, f))
    ]
    # We only care about clustering malicious traces
    mal_files = [fp for fp in files if 'malicious' in os.path.basename(fp)]
    num_mal = len(mal_files)

    # Calculate clustering metrics
    logger.log_info(module_name, "Parsing " + idirpath)
    pool = Pool(options.workers)
    data = [
        sample for sample in pool.map(
            parse_file, zip(mal_files, [options.max_classes] * num_mal))
        if sample
    ]
    pool.close()
    xs = np.array([sample[0] for sample in data])
    ns = [sample[1] for sample in data]

    # Clustering
    logger.log_info(module_name, "Calculating clusters")
    db = DBSCAN(eps=options.eps, min_samples=options.min_samples).fit(xs)
    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
    core_samples_mask[db.core_sample_indices_] = True
    labels = db.labels_

    # Number of clusters in labels, ignoring noise if present.
    n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
    n_noise = list(labels).count(-1)
    logger.log_info(module_name, '      Number of points: %d' % len(ns))
    logger.log_info(module_name, '    Number of clusters: %d' % n_clusters)
    logger.log_info(module_name, 'Number of noise points: %d' % n_noise)

    # Saving results as CSV
    if not options.csv is None:
        logger.log_info(module_name, "Saving CSV to %s" % options.csv)
        try:
            with open(options.csv, 'w') as csv_file:
                csv_file.write("cluster,filename\n")
                for label, name in zip(labels, ns):
                    csv_file.write(','.join([str(label), name]) + "\n")
        except Exception as ex:
            logger.log_error(module_name, "Failed to save CSV: %s" % str(ex))

    # Saving results as plot image
    if not options.plot is None:
        logger.log_info(module_name, "Generating plot")
        theta = radar_factory(options.max_classes, frame='polygon')
        fig, axes = plt.subplots(subplot_kw=dict(projection='radar'))
        colors = ['b', 'r', 'g', 'm', 'y']
        axes.set_varlabels([""])  # no varlabels, they aren't that meaningful
        axes.set_rgrids([0.2, 0.4, 0.6, 0.8])
        legend_labels = list()
        for label_key in set(labels):
            if label_key == -1:
                continue  # noise
            legend_labels.append(label_key)
            label_color = colors[label_key % len(colors)]
            # Calculate per-cluster average
            label_mask = (labels == label_key)
            label_points = xs[label_mask & core_samples_mask]
            label_means = np.mean(label_points, axis=0)
            axes.plot(theta, label_means, color=label_color)
            axes.fill(theta, label_means, facecolor=label_color, alpha=0.25)
        # Legend
        legend = axes.legend(legend_labels,
                             loc=(0.9, .95),
                             labelspacing=0.1,
                             fontsize='small')

        try:
            plt.savefig(options.plot)
        except:
            logger.log_error(module_name, "Failed to save plot")

    logger.log_stop()