def cmd_display_histo(): print_banner("Displaying histogram") global X_test, Y_test model_type = config.get_str('model', 'Model') sel_ds = config.get_str('dataset', 'SelectedDataset') if X_test is None or Y_test is None: X_test, Y_test = transform.get_xy(sel_ds, 'testing_set', language, vuln_type, selected_features) X_test = sync_features(X_test) display_prob_histogram(title="%s %s (class: not vulnerable)" % (vuln_type, model_type), model=model, X=X_test, Y=Y_test, cls=0) display_prob_histogram(title="%s %s (class: vulnerable)" % (vuln_type, model_type), model=model, X=X_test, Y=Y_test, cls=1)
def cmd_select_features(): print_banner("Selecting features") global selected_features sel_ds = config.get_str('dataset', 'SelectedDataset') X, Y = transform.get_xy(sel_ds, 'training_set', language, vuln_type) selected_features = train.select_features(X, Y)
def cmd_calibrate_model(): global model sel_ds = config.get_str('dataset', 'SelectedDataset') X, Y = transform.get_xy(sel_ds, 'tuning_set', language, vuln_type, selected_features) X = sync_features(X) model = CalibratedClassifierCV(model, method='isotonic', cv='prefit') model.fit(X, Y)
def cmd_tune_params(): print_banner("Tuning model parameters") global model, train_features sel_ds = config.get_str('dataset', 'SelectedDataset') X, Y = transform.get_xy(sel_ds, 'training_set', language, vuln_type, selected_features) X.sort_index(axis=1, inplace=True) if train_features is None: train_features = X.columns X_tuning, Y_tuning = transform.get_xy(sel_ds, 'tuning_set', language, vuln_type, selected_features) X_tuning = sync_features(X_tuning) train.select_best_model(X, Y, X_tuning, Y_tuning)
def cmd_test_model(): print_banner("Testing model") global X_test, Y_test sel_ds = config.get_str('dataset', 'SelectedDataset') X_test, Y_test = transform.get_xy(sel_ds, 'testing_set', language, vuln_type, selected_features) X_test = sync_features(X_test) print_metrics(model=model, X=X_test, Y=Y_test)
def cmd_count_sets(): sel_ds = config.get_str('dataset', 'SelectedDataset') _, Y_training = transform.get_xy(sel_ds, 'training_set', language, vuln_type, None) _, Y_tuning = transform.get_xy(sel_ds, 'tuning_set', language, vuln_type, None) _, Y_testing = transform.get_xy(sel_ds, 'testing_set', language, vuln_type, None) non_vuln = 0 vuln = 0 for setname, df in zip(['training', 'tuning', 'testing'], [Y_training, Y_tuning, Y_testing]): nv = len(df.loc[df[0:] == 0]) v = len(df.loc[df[0:] == 1]) non_vuln += nv vuln += v print_notice("%s set: non-vulnerable lines %d, vulnerable lines %d" % (setname, nv, v)) print_notice("total: non-vulnerable lines %d, vulnerable lines %d" % (non_vuln, vuln))
def cmd_create_model(): print_banner("Creating model") global model, train_features sel_ds = config.get_str('dataset', 'SelectedDataset') X, Y = transform.get_xy(sel_ds, 'training_set', language, vuln_type, selected_features) X.sort_index(axis=1, inplace=True) if train_features is None: train_features = X.columns model = train.select_model(language, vuln_type, X, Y)
def cmd_display_model(): print_banner("Displaying model") global X_test, Y_test model_type = config.get_str('model', 'Model') sel_ds = config.get_str('dataset', 'SelectedDataset') if X_test is None or Y_test is None: X_test, Y_test = transform.get_xy(sel_ds, 'testing_set', language, vuln_type, selected_features) X_test = sync_features(X_test) display_pr_curve(title="%s %s" % (vuln_type, model_type), model=model, X=X_test, Y=Y_test)
def cmd_filter_features(): print_banner("Filtering features") global selected_features start_string = config.get_str('model', 'FeatureFilterStartString') if selected_features is None: sel_ds = config.get_str('dataset', 'SelectedDataset') X, Y = transform.get_xy(sel_ds, 'training_set', language, vuln_type) selected_features = X.columns.values selected_features = [ feature for feature in selected_features if not feature.startswith(start_string) ] n = 1 for feature in selected_features: print_notice("%d. %s" % (n, feature)) n += 1
def cmd_compare_tools(): global train_features print_banner("Comparing results") sel_ds = config.get_str('dataset', 'SelectedDataset') sel_vt = config.get_str('dataset', 'SelectedVulnerabilityType') if train_features is None: X, _ = transform.get_xy(sel_ds, 'training_set', language, vuln_type, selected_features) X.sort_index(axis=1, inplace=True) train_features = X.columns orig_tuning, X_tuning, _ = transform.get_xy_with_orig( sel_ds, 'tuning_set', language, vuln_type, selected_features) X_tuning = sync_features(X_tuning) c = find_best_threshold(model, orig_tuning, X_tuning) print_notice("Preferred threshold (Y > c): %.2f" % c) orig, X, _ = transform.get_xy_with_orig(sel_ds, 'testing_set', language, vuln_type, selected_features) print_notice('-' * 55) print_notice("Our results") print_model_results(model, orig, X, c) for (tool, file_name) in config.get_items('tools'): print_notice('-' * 55) print_notice('Comparing against tool: %s' % tool) compare_results(file_name, orig, sel_vt)