Beispiel #1
0
 def __init__(self):
     self.hparams = Hparams_generateQualify().parser.parse_args()
     self.functions = self.readFunctions()
     self.insert_db = self.hparams.jshint_db
     self.insert_db = DBOperation(self.insert_db)
     self.insert_db.init_db()
     self.n_threads = self.hparams.n_threads if self.hparams.n_threads <= 32 else 32
Beispiel #2
0
class functionExtraction:
    def __init__(self, contents, target_db_path, n_threads):
        self.n_threads = n_threads

        self.contents = contents

        group_size = int(math.ceil(contents.__len__() / n_threads))
        self.groups = [
            self.contents[i:i + group_size]
            for i in range(0, self.contents.__len__(), group_size)
        ]

        self.target_db_op = DBOperation(target_db_path)
        self.threads = []
        self.result_batches = [[] for i in range(0, self.n_threads)]

    def execute(self):
        for i in range(0, self.n_threads):
            thread = extractionThread(i, self.groups, self.result_batches)
            self.threads.append(thread)
            thread.start()

        for i in range(0, self.n_threads):
            self.threads[i].join()

        results = []
        for i in range(0, self.n_threads):
            results.extend(self.result_batches[i])

        # 下面这两行用set做一下完全重复的用例的过滤

        unique_callables = set(r[0] for r in results)
        results = list([u] for u in unique_callables)
        self.target_db_op.insert(['Content'], results)
        self.target_db_op.finalize()
Beispiel #3
0
 def corpus_collection(self, corpus_path):
     time_stamped_db_name = os.path.join(
         corpus_path,
         'corpus-' + time.strftime("%Y%m%d", time.localtime()) + '.db')
     db_op = DBOperation(time_stamped_db_name)
     db_op.init_db()
     corpus = []
     total = 0
     for root, dirs, files in os.walk(corpus_path):
         for file in files:
             if file.lower().endswith('.js'):
                 try:
                     total += 1
                     progress = "\rProcessing: %d" % total
                     sys.stdout.write(progress)
                     with open(os.path.join(root, file), 'rb') as f:
                         content = f.read().decode('utf-8',
                                                   'ignore').strip()
                         if content:
                             corpus.append([content])
                         if len(corpus) > 10000:
                             db_op.insert(['Content'], corpus)
                             corpus.clear()
                 except Exception:
                     pass
     db_op.insert(['Content'], corpus)
     db_op.finalize()
     return time_stamped_db_name
Beispiel #4
0
def db2list(db_path: str) -> typing.List[str]:
    """从单个数据库文件中读取数据
    """
    source_db_op = DBOperation(db_path)
    contents = source_db_op.query_all(['Content'])
    source_db_op.finalize()
    contents = [i[0].strip() for i in contents]
    return contents
Beispiel #5
0
    def __init__(self, contents, target_db_path, n_threads):
        self.n_threads = n_threads

        self.contents = contents

        group_size = int(math.ceil(contents.__len__() / n_threads))
        self.groups = [
            self.contents[i:i + group_size]
            for i in range(0, self.contents.__len__(), group_size)
        ]

        self.target_db_op = DBOperation(target_db_path)
        self.threads = []
        self.result_batches = [[] for i in range(0, self.n_threads)]
Beispiel #6
0
def list2db(total_function: typing.List[str], db_path: str) -> typing.NoReturn:
    """将传入的List[str]写入到数据库中
    """
    results = [[line.strip()] for line in total_function]
    target_db_op = DBOperation(db_path)
    target_db_op.init_db()
    target_db_op.insert(['Content'], results)
    target_db_op.finalize()
Beispiel #7
0
 def function_extraction(self, source_path, target_path, n_threads):
     source_db_op = DBOperation(source_path)
     target_db_op = DBOperation(target_path)
     target_db_op.init_db()
     contents = source_db_op.query_all(['Content'])
     caseNum = len(contents)
     contents.clear()
     batch_size = 10000
     batch_num = math.ceil(caseNum / batch_size)
     for x in range(0, batch_num):
         print("\n*****FuncExtra_Batch" + str(x) + " start")
         if x < (batch_num - 1):
             contents = source_db_op.query_part(['Content'],
                                                (x * batch_size + 1),
                                                (x + 1) * batch_size)
         else:
             contents = source_db_op.query_part(['Content'],
                                                (x * batch_size), caseNum)
         processor = functionExtraction(contents, target_path, n_threads)
         processor.execute()
Beispiel #8
0
 def readFunctions(self):
     db_path = self.hparams.corpus_db
     target_db = DBOperation(db_path)
     contents = target_db.query_all(['Content'])
     return contents
Beispiel #9
0
class generateQualify:
    def __init__(self):
        self.hparams = Hparams_generateQualify().parser.parse_args()
        self.functions = self.readFunctions()
        self.insert_db = self.hparams.jshint_db
        self.insert_db = DBOperation(self.insert_db)
        self.insert_db.init_db()
        self.n_threads = self.hparams.n_threads if self.hparams.n_threads <= 32 else 32

    def readFunctions(self):
        db_path = self.hparams.corpus_db
        target_db = DBOperation(db_path)
        contents = target_db.query_all(['Content'])
        return contents

    def execute(self):
        '''
        step1:利用uglifyjs和jshint进行代码美化和语法检查
        '''
        log_path = os.path.join(self.hparams.result_dir,
                                f'generateQualify_{uuid4()}.log')
        logger_config(prefix='generateQualify', log_file=log_path)
        logging.info("before remove duplicate, quantity is:{}".format(
            len(self.functions)))
        contents = []
        for function in self.functions:
            contents.append(function[0])
        self.functions = list(set(contents))
        logging.info("after remove duplicate, quantity is:{}".format(
            len(self.functions)))
        step1 = syntaxCheck(self.functions, self.n_threads)
        step1_uglifyjs_results, step1_jshint_results = step1.execute()
        print("\n")
        # logging.info("pass uglifyjs, quantity is:{}".format(len(step1_uglifyjs_results)))
        logging.info("pass jshint, quantity is:{}".format(
            len(step1_jshint_results)))
        insert_jshint_results = [[i] for i in step1_jshint_results]
        self.insert_db.insert(["Content"], insert_jshint_results)
        self.insert_db.finalize()
        '''
        step2:统计最大,最小及平均字符长度以及方法和属性的调用情况
        '''
        for result in step1_jshint_results:
            if len(result) == 0:
                print(result)
        step2 = lengthAndCallable(step1_jshint_results)
        max_length, average_length, min_length = step2.lengthAnalyse()
        average_method_callable, average_attribute_callable = step2.callable()
        logging.info("max length of testcase is:{}".format(max_length))
        logging.info("average length of testcase is:{}".format(average_length))
        logging.info("min length of testcase is:{}".format(min_length))
        logging.info("every testcase callable methods:{} times".format(
            average_method_callable))
        logging.info("every testcase callable attributes:{} times".format(
            average_attribute_callable))
        '''
        step3:统计API使用情况
        '''
        config_path = "./data/config.json"
        step3 = getAPIs(config_path, step1_jshint_results, 8)
        start_time, endtime, kind, totalCallable, callable_frequency = step3.execute(
        )
        logging.info("getAPI start from {}".format(start_time))
        logging.info("There are {} kinds of API callabled".format(kind))
        logging.info(
            "APIs were callabled total {} times".format(totalCallable))
        logging.info("the result of API callable frequency is {}".format(
            callable_frequency))
        logging.info("getAPI end in {}".format(endtime))