Esempio n. 1
0
def analysis(directory: str = './zip',
             aggregate=False,
             key_file='keys.csv',
             res_file='res.xlsx'):
    if aggregate:
        aggregate_files(directory, key_file)
    groups = group(key_file, threshold=0.8)
    write_excel(groups, res_file)
    return True
Esempio n. 2
0
def analysis(directory: str = "./zip",
             aggregate=False,
             clusters_count=2,
             key_file="keys.csv",
             res_file="res.xlsx"):
    """ 对所有的 zip 文件进行分析,输出为到 res.xlsx 文件中"""
    if aggregate:
        aggregate_files(directory, key_file)
    file_path = get_path("keys.csv")
    groups = group(file_path, clusters_count=clusters_count)
    write_excel(groups, res_file)
    return True
Esempio n. 3
0
def analysis(
    directory: str = './zip',
    aggregate=False,
    clusters_count=2,
    file_count: int = 10,
    calc_times: bool = False,
    key_file='keys.csv',
    res_file='res.xlsx',
):
    """ 对所有的 zip 文件进行分析,输出为到 res.xlsx 文件中"""
    if aggregate:
        aggregate_files(directory, key_file)
    file_path = get_path('keys.csv')
    groups = group(file_path, clusters_count=clusters_count, calc_times=calc_times)
    if file_count > clusters_count:
        file_count = clusters_count
    each = clusters_count // file_count
    for i in range(file_count):
        subgroup = {t: groups.get(t) for t in range(i * each, (i + 1) * each)}
        write_excel(subgroup, f'res/{i + 1}_{res_file}')
    return True
Esempio n. 4
0
 def __init__(
     self,
     directory: str = './zip',
     aggregate: bool = True,
     key_file='keys.csv',
     res_file: str = 'res.xmind',
     depth: int = 4,
     width: int = 5,
 ):
     self.index = {}
     self.invert_index = {}
     self.ignore_topics = {}
     self.key_file = key_file
     self.res_file = res_file
     self.depth = depth
     self.width = width
     if aggregate:
         texts = aggregate_files(directory,
                                 file_name='keys.csv',
                                 to_csv=False)
     else:
         texts = pd.read_csv(key_file, encoding='UTF_8_SIG')
     self._build_index(texts, get_stop_words())
Esempio n. 5
0
def analysis(directory: str = "./zip", aggregate=False, key_file="keys.csv", res_file="res.xlsx"):
    if aggregate:
        aggregate_files(directory, key_file)
    groups = group(key_file, threshold=0.8)
    write_excel(groups, res_file)
    return True