def cmd_display_histo(): print_banner("Displaying histogram") global X_test, Y_test model_type = config.get_str('model', 'Model') sel_ds = config.get_str('dataset', 'SelectedDataset') if X_test is None or Y_test is None: X_test, Y_test = transform.get_xy(sel_ds, 'testing_set', language, vuln_type, selected_features) X_test = sync_features(X_test) display_prob_histogram(title="%s %s (class: not vulnerable)" % (vuln_type, model_type), model=model, X=X_test, Y=Y_test, cls=0) display_prob_histogram(title="%s %s (class: vulnerable)" % (vuln_type, model_type), model=model, X=X_test, Y=Y_test, cls=1)
def main(args=None): global language, vuln_type if args is None: args = sys.argv[1:] if not args: print_help() # Remove all spaces command_line = args[0].replace(" ", "") commands = command_line.split(",") # --- Initialize configuration -- config.init() language = config.get_str('dataset', 'SelectedLanguage') vuln_type = config.get_str('dataset', 'SelectedVulnerabilityType') np.set_printoptions(precision=3, suppress=True) # --- Run commands --- run_commands(commands) # -- Final clean up for some models -- if hasattr(model, 'clean_up'): print_banner("Cleaning up") model.clean_up()
def cmd_store_outliers(): print_banner("Store outliers") global model threshold = 0.5 if config.get_boolean('analysis', 'UseCustomTestSet'): print_notice("Creating a custom test set") sel_ds = 'Custom' threshold = 0.0 my_sets = dataset_factory.get_dataset(sel_ds).get_sets() transform.transform_sets(sel_ds, my_sets, language) orig, X, Y = transform.get_xy_with_orig(sel_ds, 'testing_set', language, vuln_type, selected_features) # TODO Delete transforms and data set #dataset_factory.get_dataset(sel_ds).delete_sets() else: sel_ds = config.get_str('dataset', 'SelectedDataset') orig, X, Y = transform.get_xy_with_orig(sel_ds, 'testing_set', language, vuln_type, selected_features) X = sync_features(X) data.store_data(model, orig, X, Y, just_outliers=True, threshold=threshold)
def cmd_clean_custom(): print_banner("Cleaning custom set") sel_ds = 'Custom' dataset_factory.get_dataset(sel_ds).delete_sets() transform.delete_transforms([sel_ds])
def cmd_create_set(): print_banner("Building sets") global sets sel_ds = config.get_str('dataset', 'SelectedDataset') sets = dataset_factory.get_dataset(sel_ds).get_sets()
def cmd_create_features(): print_banner("Creating features") global popular_features sel_ds = config.get_str('dataset', 'SelectedDataset') popular_features = transform.create_popular_features( sel_ds, sets, language)
def cmd_select_features(): print_banner("Selecting features") global selected_features sel_ds = config.get_str('dataset', 'SelectedDataset') X, Y = transform.get_xy(sel_ds, 'training_set', language, vuln_type) selected_features = train.select_features(X, Y)
def cmd_test_model(): print_banner("Testing model") global X_test, Y_test sel_ds = config.get_str('dataset', 'SelectedDataset') X_test, Y_test = transform.get_xy(sel_ds, 'testing_set', language, vuln_type, selected_features) X_test = sync_features(X_test) print_metrics(model=model, X=X_test, Y=Y_test)
def cmd_store_all(): print_banner("Store all") global model sel_ds = config.get_str('dataset', 'SelectedDataset') orig, X, Y = transform.get_xy_with_orig(sel_ds, 'testing_set', language, vuln_type, selected_features) X = sync_features(X) data.store_data(model, orig, X, Y, just_outliers=False)
def cmd_create_model(): print_banner("Creating model") global model, train_features sel_ds = config.get_str('dataset', 'SelectedDataset') X, Y = transform.get_xy(sel_ds, 'training_set', language, vuln_type, selected_features) X.sort_index(axis=1, inplace=True) if train_features is None: train_features = X.columns model = train.select_model(language, vuln_type, X, Y)
def cmd_store_custom(): print_banner("Store custom test set results") global model print_notice("Creating a custom test set") sel_ds = 'Custom' my_sets = dataset_factory.get_dataset(sel_ds).get_sets() transform.transform_sets(sel_ds, my_sets, language) orig, X, Y = transform.get_xy_with_orig(sel_ds, 'testing_set', language, vuln_type, selected_features) X = sync_features(X) data.store_data(model, orig, X, Y, just_outliers=True, threshold=0.0)
def cmd_display_model(): print_banner("Displaying model") global X_test, Y_test model_type = config.get_str('model', 'Model') sel_ds = config.get_str('dataset', 'SelectedDataset') if X_test is None or Y_test is None: X_test, Y_test = transform.get_xy(sel_ds, 'testing_set', language, vuln_type, selected_features) X_test = sync_features(X_test) display_pr_curve(title="%s %s" % (vuln_type, model_type), model=model, X=X_test, Y=Y_test)
def cmd_tune_params(): print_banner("Tuning model parameters") global model, train_features sel_ds = config.get_str('dataset', 'SelectedDataset') X, Y = transform.get_xy(sel_ds, 'training_set', language, vuln_type, selected_features) X.sort_index(axis=1, inplace=True) if train_features is None: train_features = X.columns X_tuning, Y_tuning = transform.get_xy(sel_ds, 'tuning_set', language, vuln_type, selected_features) X_tuning = sync_features(X_tuning) train.select_best_model(X, Y, X_tuning, Y_tuning)
def cmd_filter_features(): print_banner("Filtering features") global selected_features start_string = config.get_str('model', 'FeatureFilterStartString') if selected_features is None: sel_ds = config.get_str('dataset', 'SelectedDataset') X, Y = transform.get_xy(sel_ds, 'training_set', language, vuln_type) selected_features = X.columns.values selected_features = [ feature for feature in selected_features if not feature.startswith(start_string) ] n = 1 for feature in selected_features: print_notice("%d. %s" % (n, feature)) n += 1
def cmd_compare_tools(): global train_features print_banner("Comparing results") sel_ds = config.get_str('dataset', 'SelectedDataset') sel_vt = config.get_str('dataset', 'SelectedVulnerabilityType') if train_features is None: X, _ = transform.get_xy(sel_ds, 'training_set', language, vuln_type, selected_features) X.sort_index(axis=1, inplace=True) train_features = X.columns orig_tuning, X_tuning, _ = transform.get_xy_with_orig( sel_ds, 'tuning_set', language, vuln_type, selected_features) X_tuning = sync_features(X_tuning) c = find_best_threshold(model, orig_tuning, X_tuning) print_notice("Preferred threshold (Y > c): %.2f" % c) orig, X, _ = transform.get_xy_with_orig(sel_ds, 'testing_set', language, vuln_type, selected_features) print_notice('-' * 55) print_notice("Our results") print_model_results(model, orig, X, c) for (tool, file_name) in config.get_items('tools'): print_notice('-' * 55) print_notice('Comparing against tool: %s' % tool) compare_results(file_name, orig, sel_vt)
def cmd_clean_set(): print_banner("Cleaning sets") sel_ds = config.get_str('dataset', 'SelectedDataset') dataset_factory.get_dataset(sel_ds).delete_sets()
def cmd_clean_transform(): print_banner("Cleaning transforms") transform.delete_transforms()
def cmd_create_transform(): print_banner("Transforming sets") sel_ds = config.get_str('dataset', 'SelectedDataset') transform.transform_sets(sel_ds, sets, language)