def __init__(self): self.hparams = Hparams_generateQualify().parser.parse_args() self.functions = self.readFunctions() self.insert_db = self.hparams.jshint_db self.insert_db = DBOperation(self.insert_db) self.insert_db.init_db() self.n_threads = self.hparams.n_threads if self.hparams.n_threads <= 32 else 32
class functionExtraction: def __init__(self, contents, target_db_path, n_threads): self.n_threads = n_threads self.contents = contents group_size = int(math.ceil(contents.__len__() / n_threads)) self.groups = [ self.contents[i:i + group_size] for i in range(0, self.contents.__len__(), group_size) ] self.target_db_op = DBOperation(target_db_path) self.threads = [] self.result_batches = [[] for i in range(0, self.n_threads)] def execute(self): for i in range(0, self.n_threads): thread = extractionThread(i, self.groups, self.result_batches) self.threads.append(thread) thread.start() for i in range(0, self.n_threads): self.threads[i].join() results = [] for i in range(0, self.n_threads): results.extend(self.result_batches[i]) # 下面这两行用set做一下完全重复的用例的过滤 unique_callables = set(r[0] for r in results) results = list([u] for u in unique_callables) self.target_db_op.insert(['Content'], results) self.target_db_op.finalize()
def corpus_collection(self, corpus_path): time_stamped_db_name = os.path.join( corpus_path, 'corpus-' + time.strftime("%Y%m%d", time.localtime()) + '.db') db_op = DBOperation(time_stamped_db_name) db_op.init_db() corpus = [] total = 0 for root, dirs, files in os.walk(corpus_path): for file in files: if file.lower().endswith('.js'): try: total += 1 progress = "\rProcessing: %d" % total sys.stdout.write(progress) with open(os.path.join(root, file), 'rb') as f: content = f.read().decode('utf-8', 'ignore').strip() if content: corpus.append([content]) if len(corpus) > 10000: db_op.insert(['Content'], corpus) corpus.clear() except Exception: pass db_op.insert(['Content'], corpus) db_op.finalize() return time_stamped_db_name
def db2list(db_path: str) -> typing.List[str]: """从单个数据库文件中读取数据 """ source_db_op = DBOperation(db_path) contents = source_db_op.query_all(['Content']) source_db_op.finalize() contents = [i[0].strip() for i in contents] return contents
def __init__(self, contents, target_db_path, n_threads): self.n_threads = n_threads self.contents = contents group_size = int(math.ceil(contents.__len__() / n_threads)) self.groups = [ self.contents[i:i + group_size] for i in range(0, self.contents.__len__(), group_size) ] self.target_db_op = DBOperation(target_db_path) self.threads = [] self.result_batches = [[] for i in range(0, self.n_threads)]
def list2db(total_function: typing.List[str], db_path: str) -> typing.NoReturn: """将传入的List[str]写入到数据库中 """ results = [[line.strip()] for line in total_function] target_db_op = DBOperation(db_path) target_db_op.init_db() target_db_op.insert(['Content'], results) target_db_op.finalize()
def function_extraction(self, source_path, target_path, n_threads): source_db_op = DBOperation(source_path) target_db_op = DBOperation(target_path) target_db_op.init_db() contents = source_db_op.query_all(['Content']) caseNum = len(contents) contents.clear() batch_size = 10000 batch_num = math.ceil(caseNum / batch_size) for x in range(0, batch_num): print("\n*****FuncExtra_Batch" + str(x) + " start") if x < (batch_num - 1): contents = source_db_op.query_part(['Content'], (x * batch_size + 1), (x + 1) * batch_size) else: contents = source_db_op.query_part(['Content'], (x * batch_size), caseNum) processor = functionExtraction(contents, target_path, n_threads) processor.execute()
def readFunctions(self): db_path = self.hparams.corpus_db target_db = DBOperation(db_path) contents = target_db.query_all(['Content']) return contents
class generateQualify: def __init__(self): self.hparams = Hparams_generateQualify().parser.parse_args() self.functions = self.readFunctions() self.insert_db = self.hparams.jshint_db self.insert_db = DBOperation(self.insert_db) self.insert_db.init_db() self.n_threads = self.hparams.n_threads if self.hparams.n_threads <= 32 else 32 def readFunctions(self): db_path = self.hparams.corpus_db target_db = DBOperation(db_path) contents = target_db.query_all(['Content']) return contents def execute(self): ''' step1:利用uglifyjs和jshint进行代码美化和语法检查 ''' log_path = os.path.join(self.hparams.result_dir, f'generateQualify_{uuid4()}.log') logger_config(prefix='generateQualify', log_file=log_path) logging.info("before remove duplicate, quantity is:{}".format( len(self.functions))) contents = [] for function in self.functions: contents.append(function[0]) self.functions = list(set(contents)) logging.info("after remove duplicate, quantity is:{}".format( len(self.functions))) step1 = syntaxCheck(self.functions, self.n_threads) step1_uglifyjs_results, step1_jshint_results = step1.execute() print("\n") # logging.info("pass uglifyjs, quantity is:{}".format(len(step1_uglifyjs_results))) logging.info("pass jshint, quantity is:{}".format( len(step1_jshint_results))) insert_jshint_results = [[i] for i in step1_jshint_results] self.insert_db.insert(["Content"], insert_jshint_results) self.insert_db.finalize() ''' step2:统计最大,最小及平均字符长度以及方法和属性的调用情况 ''' for result in step1_jshint_results: if len(result) == 0: print(result) step2 = lengthAndCallable(step1_jshint_results) max_length, average_length, min_length = step2.lengthAnalyse() average_method_callable, average_attribute_callable = step2.callable() logging.info("max length of testcase is:{}".format(max_length)) logging.info("average length of testcase is:{}".format(average_length)) logging.info("min length of testcase is:{}".format(min_length)) logging.info("every testcase callable methods:{} times".format( average_method_callable)) logging.info("every testcase callable attributes:{} times".format( average_attribute_callable)) ''' step3:统计API使用情况 ''' config_path = "./data/config.json" step3 = getAPIs(config_path, step1_jshint_results, 8) start_time, endtime, kind, totalCallable, callable_frequency = step3.execute( ) logging.info("getAPI start from {}".format(start_time)) logging.info("There are {} kinds of API callabled".format(kind)) logging.info( "APIs were callabled total {} times".format(totalCallable)) logging.info("the result of API callable frequency is {}".format( callable_frequency)) logging.info("getAPI end in {}".format(endtime))