def update_per_subtoken_statistics(self, results, true_positive, false_positive, false_negative): for original_name, top_words in results: prediction = common.filter_impossible_names(top_words)[0] original_subtokens = common.get_subtokens(original_name) predicted_subtokens = common.get_subtokens(prediction) for subtok in predicted_subtokens: if subtok in original_subtokens: true_positive += 1 else: false_positive += 1 for subtok in original_subtokens: if not subtok in predicted_subtokens: false_negative += 1 return true_positive, false_positive, false_negative
def update_correct_predictions(self, num_correct_predictions, output_file, results): for original_name, top_words in results: normalized_original_name = common.normalize_word(original_name) predicted_something = False for i, predicted_word in enumerate(common.filter_impossible_names(top_words)): if i == 0: output_file.write('Original: ' + original_name + ', predicted 1st: ' + predicted_word + '\n') predicted_something = True normalized_suggestion = common.normalize_word(predicted_word) if normalized_original_name == normalized_suggestion: output_file.write('\t\t predicted correctly at rank: ' + str(i + 1) + '\n') for j in range(i, self.topk): num_correct_predictions[j] += 1 break if not predicted_something: output_file.write('No results for predicting: ' + original_name) return num_correct_predictions
def update_algorithm_dict(results, algorithm_dict): for original_name, top_words in results: prediction = common.filter_impossible_names(top_words)[0] original_subtokens = common.get_subtokens(original_name) predicted_subtokens = common.get_subtokens(prediction) original_main = original_subtokens[0] predicted_main = predicted_subtokens[0] # print('original_subtokens: ', original_subtokens) # print('predicted_subtokens: ', predicted_subtokens) # print('') if predicted_main not in algorithm_dict: algorithm_dict[predicted_main] = Algorithm(predicted_main, 0, 0) else: if predicted_main == original_main: algorithm_dict[predicted_main].true_positive += 1 else: algorithm_dict[predicted_main].false_negative += 1 return algorithm_dict
def predict(self): input_filename = 'Input.java' # MAX_ATTEMPTS = 50 # MAX_NODES_TO_OPEN = 10 word_to_indextop, indextop_to_word = self.model.create_ordered_words_dictionary( self.model.get_data_dictionaries_path(self.config.LOAD_PATH), self.config.MAX_WORDS_FROM_VOCAB_FOR_ADVERSARIAL) print('Starting interactive prediction with BFS adversarial search... (depth = {}, topk = {})' .format(self.max_depth, self.topk)) while True: print( 'Modify the file: "%s" and press any key when ready, or "q" / "quit" / "exit" to exit' % input_filename) user_input = input() if user_input.lower() in self.exit_keywords: print('Exiting...') return with open(input_filename, "r") as f: original_code = f.read() try: predict_lines, hash_to_string_dict = self.path_extractor.extract_paths(input_filename) except ValueError as e: print(e) continue var_code_split_index = predict_lines[0].find(" ") original_code = predict_lines[0][var_code_split_index + 1:] results = self.model.predict([original_code]) prediction_results = common.parse_results(results, hash_to_string_dict, topk=SHOW_TOP_CONTEXTS) for method_prediction in prediction_results: print('Original name:\t' + method_prediction.original_name) for name_prob_pair in method_prediction.predictions: print('\t(%f) predicted: %s' % (name_prob_pair['probability'], name_prob_pair['name'])) # generate pca self.model.creat_PCA_tokens(predict_lines[0]) # Search for adversarial examples print("select variable to rename OR -- to skip search:") var_to_rename = input() if var_to_rename == "--": continue while True: print("select attack type: 'nontargeted' for non-targeted attack") print("OR target method name for targeted attack (each word is seperated by |)") attack_type = input() # untargeted searcher if attack_type == "nontargeted": print("Using non-targeted attack") searcher = AdversarialSearcher(self.topk, self.max_depth, word_to_indextop, indextop_to_word, predict_lines[0], lambda c, v: [(var_to_rename, var_to_rename)]) break else: # targeted searcher if attack_type in self.model.target_word_to_index: print("Using targeted attack. target:", attack_type) searcher = AdversarialTargetedSearcher(self.topk, self.max_depth, word_to_indextop, indextop_to_word, predict_lines[0], attack_type, lambda c, v: [(var_to_rename, var_to_rename)]) break print(attack_type, "not existed in vocab! try again") adversarial_results = [] # original_prediction = '|'.join(method_prediction.predictions[0]['name']) while True: batch_nodes_data = [(n, c) for n, c in searcher.pop_unchecked_adversarial_code()] batch_data = [c for _, c in batch_nodes_data] results = self.model.predict(batch_data, self.guard_input) for (node, _), res in zip(batch_nodes_data, results): one_top_words = res[1] one_top_words = common.filter_impossible_names(one_top_words) if not one_top_words: print("code with state: " + str(node) + " cause empty predictions\n") continue if searcher.is_target_found(one_top_words): adversarial_results.append((one_top_words[0],node)) if adversarial_results and not self.multiple_results: break batch_data = [searcher.get_adversarial_code()] batch_word_to_derive = [searcher.get_word_to_derive()] loss, all_grads = self.model.calc_loss_and_gradients_wrt_input(batch_data, batch_word_to_derive, indextop_to_word) if not searcher.next((0, "", all_grads[0])): break if not results: print("FAILD! no replaces found") else: print("variable replaces:") print("Prediction\tnode") for r in adversarial_results: print(r[0],"\t",r[1])
def predict(self): input_filename = 'Input.java' # MAX_ATTEMPTS = 50 # MAX_NODES_TO_OPEN = 10 src_folder = "test_adversarial/src" # input_src = ["contains.java", "count.java", "done.java", "escape.java", "factorial.java", "get.java", # "indexOf.java", "isPrime.java", "postRequest.java", "reverseArray.java", "sort.java"] input_src = os.listdir(src_folder) targets = [ "sort", "contains", "get", "index|of", "done", "reverse|array", "count", "is|prime", "post|request", "escape", "add", "close", "main", "max", "min", "factorial", "load", "foo", "update", "bar", "exception", "test", "swap", "predict" ] word_to_indextop, indextop_to_word = self.model.create_ordered_words_dictionary( self.model.get_data_dictionaries_path(self.config.LOAD_PATH), self.config.MAX_WORDS_FROM_VOCAB_FOR_ADVERSARIAL) print( 'Starting interactive prediction with BFS adversarial search... (depth = {}, topk = {})' .format(self.max_depth, self.topk)) for src in input_src: print('SAMPLE: ', src) input_filename = src_folder + "/" + src try: predict_lines, hash_to_string_dict = self.path_extractor.extract_paths( input_filename) except ValueError as e: print(e) continue var, original_code = common_adversarial.separate_vars_code( predict_lines[0]) # ignore methods without vars if not common_adversarial.get_all_vars(var): print("NO VARS. skip.") continue results = self.model.predict([original_code]) prediction_results = common.parse_results(results, hash_to_string_dict, topk=SHOW_TOP_CONTEXTS) # skip method that were predicted wrong method_prediction = prediction_results[0] if method_prediction.original_name.lower() != "".join( method_prediction.predictions[0]['name']): print("WRONG PREDICTION. skip. (true: {}, pred: {})".format( method_prediction.original_name, method_prediction.predictions)) continue for method_prediction in prediction_results: print('Original name:\t' + method_prediction.original_name) for name_prob_pair in method_prediction.predictions: print('\t(%f) predicted: %s' % (name_prob_pair['probability'], name_prob_pair['name'])) # Search for adversarial examples print("ADVERSARIAL results:") for target in targets: print("TARGET:", target) if target != "nontargeted" and target not in self.model.target_word_to_index: print("target not exist. skip.") continue for var_to_rename in common_adversarial.get_all_vars(var): # untargeted searcher if target == "nontargeted": searcher = AdversarialSearcher( self.topk, self.max_depth, word_to_indextop, indextop_to_word, predict_lines[0], lambda c, v: [(var_to_rename, var_to_rename)]) else: # targeted searcher searcher = AdversarialTargetedSearcher( self.topk, self.max_depth, word_to_indextop, indextop_to_word, predict_lines[0], target, lambda c, v: [(var_to_rename, var_to_rename)]) adversarial_results = [] while True: batch_nodes_data = [ (n, c) for n, c in searcher.pop_unchecked_adversarial_code() ] batch_data = [c for _, c in batch_nodes_data] results = self.model.predict(batch_data, self.guard_input) for (node, _), res in zip(batch_nodes_data, results): one_top_words = res[1] one_top_words = common.filter_impossible_names( one_top_words) if not one_top_words: print("code with state: " + str(node) + " cause empty predictions\n") continue if searcher.is_target_found(one_top_words): adversarial_results.append( (one_top_words[0], node)) if adversarial_results and not self.multiple_results: break batch_data = [searcher.get_adversarial_code()] batch_word_to_derive = [searcher.get_word_to_derive()] loss, all_grads = self.model.calc_loss_and_gradients_wrt_input( batch_data, batch_word_to_derive, indextop_to_word) if not searcher.next((0, "", all_grads[0])): break for r in adversarial_results: print(r[0], "\t\t\t", r[1])