Beispiel #1
0
def get_dirfiles_into_list_luhn(file_dir,replace_dir):
    list,result  = [],{}
    tools.get_filelist(file_dir,list)
    for listfile in list:
        filename = tools.get_name(listfile)
        filename = filename[8:]
        if filename not in result.keys():
            result[filename ]= []
        if replace_dir == "":
            result[filename] = (listfile)
        else:
            result[filename].append(str(replace_dir + "/" + tools.get_name(listfile)+".txt"))
    return result
Beispiel #2
0
def replace_words_by_num(whole_words,file_dir,save_dir):
    if os.path.lexists(save_dir):
        shutil.rmtree(save_dir)
    filename = []
    def filter(s):
        if "all" in s:
            return True
        return False
    tools.get_filelist(file_dir,filename,filter)
    content = {}
    for file in filename:
        lines = tools.read_lines(file)
        string = ""
        for line in lines:
            words = line.split(" ")
            for word in words:
                if word.__len__()>0:
                    if word in whole_words.keys():
                        string+= str(whole_words[word])+" "
            string = string.strip()
            string+="\n"
        content[tools.get_name(file)] = string
        # print(string)
        # input()
    for name in content:
        savepath = save_dir+name+".txt"
        tools.write(savepath,content[name])
Beispiel #3
0
    def read_file(self, dir):
        filelist = []
        tools.get_filelist(dir, filelist)
        data = {}
        reverse_data = {}
        filelist = sorted(filelist)
        for filename in filelist:
            with open(filename, mode="r", encoding="utf-8") as file:
                content = file.read()
                sentences = self.seperate_sentences(content)
                data[filename] = sentences
                for sen in sentences:
                    if sen not in reverse_data.keys():
                        reverse_data[sen] = [tools.get_name(filename)]
                    else:

                        reverse_data[sen].append(tools.get_name(filename))
                        # print(sen,reverse_data[sen])
        return data, reverse_data
Beispiel #4
0
def result_process(file_dir,save_dir):
    if os.path.lexists(save_dir):
        shutil.rmtree(save_dir)
    filenames = []
    tools.get_filelist(file_dir,filenames)
    for file in filenames:
        content  = tools.read_lines(file)
        name = tools.get_name(file)
        result =[]
        for line in content:
            words = jieba.cut(line)
            string = ""
            for word in words:
                string+= word+" "
            string = string[:-1]
            result.append(string)
            save_path = save_dir+"/"+name+".txt"
            tools.write_list(save_path,result)