Ejemplo n.º 1
0
def cmd_display_histo():
    print_banner("Displaying histogram")

    global X_test, Y_test

    model_type = config.get_str('model', 'Model')
    sel_ds = config.get_str('dataset', 'SelectedDataset')

    if X_test is None or Y_test is None:
        X_test, Y_test = transform.get_xy(sel_ds, 'testing_set', language,
                                          vuln_type, selected_features)

        X_test = sync_features(X_test)

    display_prob_histogram(title="%s %s (class: not vulnerable)" %
                           (vuln_type, model_type),
                           model=model,
                           X=X_test,
                           Y=Y_test,
                           cls=0)

    display_prob_histogram(title="%s %s (class: vulnerable)" %
                           (vuln_type, model_type),
                           model=model,
                           X=X_test,
                           Y=Y_test,
                           cls=1)
Ejemplo n.º 2
0
def main(args=None):
    global language, vuln_type

    if args is None:
        args = sys.argv[1:]

    if not args:
        print_help()

    # Remove all spaces
    command_line = args[0].replace(" ", "")

    commands = command_line.split(",")

    # --- Initialize configuration --
    config.init()

    language = config.get_str('dataset', 'SelectedLanguage')
    vuln_type = config.get_str('dataset', 'SelectedVulnerabilityType')

    np.set_printoptions(precision=3, suppress=True)

    # --- Run commands ---
    run_commands(commands)

    # -- Final clean up for some models --
    if hasattr(model, 'clean_up'):
        print_banner("Cleaning up")
        model.clean_up()
Ejemplo n.º 3
0
def cmd_store_outliers():
    print_banner("Store outliers")

    global model

    threshold = 0.5

    if config.get_boolean('analysis', 'UseCustomTestSet'):
        print_notice("Creating a custom test set")
        sel_ds = 'Custom'
        threshold = 0.0

        my_sets = dataset_factory.get_dataset(sel_ds).get_sets()

        transform.transform_sets(sel_ds, my_sets, language)

        orig, X, Y = transform.get_xy_with_orig(sel_ds, 'testing_set',
                                                language, vuln_type,
                                                selected_features)

        # TODO Delete transforms and data set
        #dataset_factory.get_dataset(sel_ds).delete_sets()

    else:
        sel_ds = config.get_str('dataset', 'SelectedDataset')

        orig, X, Y = transform.get_xy_with_orig(sel_ds, 'testing_set',
                                                language, vuln_type,
                                                selected_features)

    X = sync_features(X)

    data.store_data(model, orig, X, Y, just_outliers=True, threshold=threshold)
Ejemplo n.º 4
0
def cmd_clean_custom():
    print_banner("Cleaning custom set")

    sel_ds = 'Custom'

    dataset_factory.get_dataset(sel_ds).delete_sets()

    transform.delete_transforms([sel_ds])
Ejemplo n.º 5
0
def cmd_create_set():
    print_banner("Building sets")

    global sets

    sel_ds = config.get_str('dataset', 'SelectedDataset')

    sets = dataset_factory.get_dataset(sel_ds).get_sets()
Ejemplo n.º 6
0
def cmd_create_features():
    print_banner("Creating features")

    global popular_features

    sel_ds = config.get_str('dataset', 'SelectedDataset')

    popular_features = transform.create_popular_features(
        sel_ds, sets, language)
Ejemplo n.º 7
0
def cmd_select_features():
    print_banner("Selecting features")

    global selected_features

    sel_ds = config.get_str('dataset', 'SelectedDataset')

    X, Y = transform.get_xy(sel_ds, 'training_set', language, vuln_type)

    selected_features = train.select_features(X, Y)
Ejemplo n.º 8
0
def cmd_test_model():
    print_banner("Testing model")

    global X_test, Y_test

    sel_ds = config.get_str('dataset', 'SelectedDataset')

    X_test, Y_test = transform.get_xy(sel_ds, 'testing_set', language,
                                      vuln_type, selected_features)

    X_test = sync_features(X_test)

    print_metrics(model=model, X=X_test, Y=Y_test)
Ejemplo n.º 9
0
def cmd_store_all():
    print_banner("Store all")

    global model

    sel_ds = config.get_str('dataset', 'SelectedDataset')

    orig, X, Y = transform.get_xy_with_orig(sel_ds, 'testing_set', language,
                                            vuln_type, selected_features)

    X = sync_features(X)

    data.store_data(model, orig, X, Y, just_outliers=False)
Ejemplo n.º 10
0
def cmd_create_model():
    print_banner("Creating model")

    global model, train_features

    sel_ds = config.get_str('dataset', 'SelectedDataset')

    X, Y = transform.get_xy(sel_ds, 'training_set', language, vuln_type,
                            selected_features)

    X.sort_index(axis=1, inplace=True)

    if train_features is None:
        train_features = X.columns

    model = train.select_model(language, vuln_type, X, Y)
Ejemplo n.º 11
0
def cmd_store_custom():
    print_banner("Store custom test set results")

    global model

    print_notice("Creating a custom test set")
    sel_ds = 'Custom'

    my_sets = dataset_factory.get_dataset(sel_ds).get_sets()

    transform.transform_sets(sel_ds, my_sets, language)

    orig, X, Y = transform.get_xy_with_orig(sel_ds, 'testing_set', language,
                                            vuln_type, selected_features)

    X = sync_features(X)

    data.store_data(model, orig, X, Y, just_outliers=True, threshold=0.0)
Ejemplo n.º 12
0
def cmd_display_model():
    print_banner("Displaying model")

    global X_test, Y_test

    model_type = config.get_str('model', 'Model')
    sel_ds = config.get_str('dataset', 'SelectedDataset')

    if X_test is None or Y_test is None:
        X_test, Y_test = transform.get_xy(sel_ds, 'testing_set', language,
                                          vuln_type, selected_features)

        X_test = sync_features(X_test)

    display_pr_curve(title="%s %s" % (vuln_type, model_type),
                     model=model,
                     X=X_test,
                     Y=Y_test)
Ejemplo n.º 13
0
def cmd_tune_params():
    print_banner("Tuning model parameters")

    global model, train_features

    sel_ds = config.get_str('dataset', 'SelectedDataset')

    X, Y = transform.get_xy(sel_ds, 'training_set', language, vuln_type,
                            selected_features)

    X.sort_index(axis=1, inplace=True)

    if train_features is None:
        train_features = X.columns

    X_tuning, Y_tuning = transform.get_xy(sel_ds, 'tuning_set', language,
                                          vuln_type, selected_features)

    X_tuning = sync_features(X_tuning)

    train.select_best_model(X, Y, X_tuning, Y_tuning)
Ejemplo n.º 14
0
def cmd_filter_features():
    print_banner("Filtering features")

    global selected_features

    start_string = config.get_str('model', 'FeatureFilterStartString')

    if selected_features is None:
        sel_ds = config.get_str('dataset', 'SelectedDataset')

        X, Y = transform.get_xy(sel_ds, 'training_set', language, vuln_type)

        selected_features = X.columns.values

    selected_features = [
        feature for feature in selected_features
        if not feature.startswith(start_string)
    ]
    n = 1

    for feature in selected_features:
        print_notice("%d. %s" % (n, feature))
        n += 1
Ejemplo n.º 15
0
def cmd_compare_tools():
    global train_features

    print_banner("Comparing results")

    sel_ds = config.get_str('dataset', 'SelectedDataset')
    sel_vt = config.get_str('dataset', 'SelectedVulnerabilityType')

    if train_features is None:
        X, _ = transform.get_xy(sel_ds, 'training_set', language, vuln_type,
                                selected_features)
        X.sort_index(axis=1, inplace=True)

        train_features = X.columns

    orig_tuning, X_tuning, _ = transform.get_xy_with_orig(
        sel_ds, 'tuning_set', language, vuln_type, selected_features)

    X_tuning = sync_features(X_tuning)

    c = find_best_threshold(model, orig_tuning, X_tuning)

    print_notice("Preferred threshold (Y > c): %.2f" % c)

    orig, X, _ = transform.get_xy_with_orig(sel_ds, 'testing_set', language,
                                            vuln_type, selected_features)

    print_notice('-' * 55)
    print_notice("Our results")

    print_model_results(model, orig, X, c)

    for (tool, file_name) in config.get_items('tools'):
        print_notice('-' * 55)
        print_notice('Comparing against tool: %s' % tool)
        compare_results(file_name, orig, sel_vt)
Ejemplo n.º 16
0
def cmd_clean_set():
    print_banner("Cleaning sets")

    sel_ds = config.get_str('dataset', 'SelectedDataset')

    dataset_factory.get_dataset(sel_ds).delete_sets()
Ejemplo n.º 17
0
def cmd_clean_transform():
    print_banner("Cleaning transforms")

    transform.delete_transforms()
Ejemplo n.º 18
0
def cmd_create_transform():
    print_banner("Transforming sets")

    sel_ds = config.get_str('dataset', 'SelectedDataset')

    transform.transform_sets(sel_ds, sets, language)