Ejemplo n.º 1
0
Archivo: api.py Proyecto: WuYff/Bugine
def descript(query_decp, source_category, except_files=None,extend=False, pool_size=32):
    """
    生成描述文件
    ~1分钟得出结果
    :param query_decp: 描述文件矩阵
    example line: xml_file_name, class_name, element_name
    :param except_files: 排除文件关键词,接受字符串或字符串数组
    :param pool_size: 并行池大小
    :return: a tuple. 得到src app与 数据库每个app的总相似度,按照相似度降序排列. 用作 搜索 app
    """
    query_decp = nlp_util.process_xsv(query_decp)
    if extend :
        src_dir = work_path.in_project('./model/data/description_extend_all')
    else:
        src_dir = work_path.in_project('./model/data/description')
    print("PATH!!!! {}".format(src_dir))
    logger = logging.getLogger("StreamLogger")
    file_list = os.listdir(src_dir)
    file_list = [os.path.join(src_dir, f) for f in file_list]

    if except_files is not None:
        tmp = []
        rms = []
        if type(except_files) == str:
            for i in file_list:
                if except_files not in i:
                    tmp.append(i)
                else:
                    rms.append(i)
        elif type(except_files) == list or type(except_files) == set:
            except_files = set(except_files)
            for i in file_list:
                flag = False
                for j in except_files:
                    if j in i:
                        flag = True
                        break
                if not flag:
                    tmp.append(i)
                else:
                    rms.append(i)
        logger.debug(pp.pformat(rms))
    file_list = tmp
    logger.debug(pp.pformat(file_list))

    scan_output = _scan_match(source_category, query_decp, file_list, match_name.ngram_compare, [1, 0.5, 0.5],
                              threshold=0.7,
                              pool_size=pool_size)
    # 得到src app与 数据库每个app的总相似度,按照相似度降序排列。
    # tuple(
    # str "参考APP描述文件名",
    # float "APP相似度",
    # list "参考APP的组件相似度" [(请求app组件, 参考app组件,组件相似度)]
    # )
    logger.debug(pp.pformat(util.get_col(scan_output, [0, 1])))
    return scan_output
Ejemplo n.º 2
0
def _single_scan_helper(arg):
    index, file_path, sample_ui_list, comp_func, weight_list, threshold = arg
    logger = logging.getLogger("StreamLogger")
    logger.debug(file_path)
    tmp_out = util.read_csv(file_path)
    tmp_out = nlp_util.process_xsv(tmp_out)

    if len(tmp_out) == 0:
        logger.debug(f"EMPTY {file_path}")
        score_distribution_list = []
    else:
        score_distribution_list = match_name.weight_compare_list(sample_ui_list, tmp_out, comp_func,
                                                                 weight_list)
    # score_distribution_list = util.get_col(score_distribution_list, 2)
    score = match_name.similar_index(score_distribution_list, threshold, col_index=2, rate=True)

    rt = (file_path, score, score_distribution_list)
    logger.debug(f"ADD {index} {file_path}")
    return rt