def main(argv): args = parse_args(argv) path = args["path"] ans = input( "WARNING: this will remove all non-readable lines in ALL files in folder & nested folders in {}\n Continue? (y/n)" .format(path)) while ans != "y" and ans != "n": ans = input( "WARNING: this will remove all non-readable lines in ALL files in folder & nested folders in {}\n Continue? (y/n)" .format(path)) if ans == "n": print("CleanSubtitles canceled") return subfolders = get_subfolders(path) for subfolder in subfolders: for filepath in get_files(subfolder): print("doing {}".format(filepath)) with open(filepath, 'r', encoding='utf-8') as file_r: try: lines = file_r.readlines() except Exception as e: exception(e) continue if lines: with open(filepath, 'w', encoding='utf-8') as file_w: for line in lines: if re.search( "[一-龠]+|[ぁ-ゔ]+|[ァ-ヴー]+|[a-zA-Z]+|[a-zA-Z]+|[々〆〤]+", line): file_w.write(line) else: pass
def __init__(self, argv): self.args = parse_args(argv) self.db = JdsDatabase()
def __init__(self, argv): self.args = parse_args(argv) self.db = JdsDatabase() self.chars = self.db.get_all_chars_with_count() self.total_count = None
import time from python import settings from python.DccUtils import parse_args from python.JdsDatabase import JdsDatabase if __name__ == "__main__": print("{} started".format(__file__)) start_time = time.perf_counter() pr = None if settings.enable_profiler: pr = cProfile.Profile() pr.enable() args = parse_args(sys.argv[1:]) db = JdsDatabase() kanji_info_results = db.get_kanji_info_raw() kanji_count_results = db.get_kanji_count_raw() with open(settings.csv_path_kanji, mode='w', encoding='utf-8', newline='') as csv_file: fieldnames = [ 'kanji', 'count', 'freq', 'cumul_freq', 'drama_freq', 'episode_freq', 'jdpt', 'jdpt_pos', 'jouyou', 'jouyou_pos' ] writer = csv.DictWriter(csv_file, fieldnames=fieldnames, delimiter='\t') rows = {} writer.writeheader()