def word_tokenize(batch_data, tokenized_data, js_compile): special_type = ['Keyword', 'Numeric'] for code in tqdm(batch_data, ncols=80): tokenized_line = [] result = js_compile.call('tokenize', code) for token in result: type = token.get('type') value = token.get('value') if type in special_type: tokenized_line.append(value) tokenized_line.append('▁') else: tokenized_line.append(value) tokenized_data.append(tokenized_line) return tokenized_data
def get_mutation_points(code_str: str, js_compile) -> List[int]: """ 从code_str中选取变异点 Args: code_str (str): 待变异的语法正确的JS代码 Returns: list,变异点的索引的列表 """ point_indexes = [] try: # 通过语法树,选择适当的变异点 point_indexes = js_compile.call('get_mutation_points', code_str) except: # 假如语法树解析过程发生异常,则随机选取变异点 length = len(code_str) if length > 1: point_indexes = [int(length / 3), int(length / 2)] for _ in range(3): point_indexes.append(random.randint(1, length - 1)) # 至多返回5个变异点 random.shuffle(point_indexes) return point_indexes[:5]
def check(): # 1. 验证node以及npm包是否安装 print("正在进行运行环境验证,请稍等...") try: code_str = "print('hello world');" point_indexes = js_compile.call('get_mutation_points', code_str) print("[1]node以及npm包安装正确.") except: print("[1]node以及npm包安装有误,请检查后重试.") exit() # 2. 验证数据库连接是否有效 database = DataBase(hparams.seed_pool_url) try: database.test_db_connect() print("[2]数据库连接正确.") except: print("[2]数据库连接异常,请检查后重试.") # 3. 验证待测引擎是否有效 fuzzer = Fuzzer(hparams.engines, hparams.timeout) print("[3]待测引擎配置正确.") print("所有环境检查无误,即将开始Fuzzing.")
code_str = code_str.replace('WScript.Echo', 'print') except: no_pass_list.append(file) continue # step2.通用性过滤 fuzzing_result = fuzzer.run_testcase_multi_threads(code_str) # 假如通过,则写入到target_dir中 if fuzzing_result.is_all_pass(): id = str(uuid4()).replace('-', '')[:16] file_name = os.path.join(target_dir, f'original_{id}.js') # step3.基于AST做移除注释和语法过滤 try: code_str = js_compile.call('remove_comment', code_str) except: remove_comment_error_count += 1 continue # step4.统一空格形式后去重,并按长度进行过滤 code_str = re.sub(' +', ' ', code_str.strip().replace('\n', ' ').replace('\t', ' ')) if 0 < len(code_str) < 100000: continue # 写入文件 write_file(file_name, code_str) pass_count += 1 else: no_pass_list.append(file)