예제 #1
0
def main(argv):
    args = parse_args(argv)
    path = args["path"]

    ans = input(
        "WARNING: this will remove all non-readable lines in ALL files in folder & nested folders in {}\n Continue? (y/n)"
        .format(path))
    while ans != "y" and ans != "n":
        ans = input(
            "WARNING: this will remove all non-readable lines in ALL files in folder & nested folders in {}\n Continue? (y/n)"
            .format(path))
    if ans == "n":
        print("CleanSubtitles canceled")
        return

    subfolders = get_subfolders(path)

    for subfolder in subfolders:
        for filepath in get_files(subfolder):
            print("doing {}".format(filepath))
            with open(filepath, 'r', encoding='utf-8') as file_r:
                try:
                    lines = file_r.readlines()
                except Exception as e:
                    exception(e)
                    continue

                if lines:
                    with open(filepath, 'w', encoding='utf-8') as file_w:
                        for line in lines:
                            if re.search(
                                    "[一-龠]+|[ぁ-ゔ]+|[ァ-ヴー]+|[a-zA-Z]+|[a-zA-Z]+|[々〆〤]+",
                                    line):
                                file_w.write(line)
                            else:
                                pass
예제 #2
0
 def __init__(self, argv):
     self.args = parse_args(argv)
     self.db = JdsDatabase()
예제 #3
0
 def __init__(self, argv):
     self.args = parse_args(argv)
     self.db = JdsDatabase()
     self.chars = self.db.get_all_chars_with_count()
     self.total_count = None
예제 #4
0
import time

from python import settings
from python.DccUtils import parse_args
from python.JdsDatabase import JdsDatabase

if __name__ == "__main__":
    print("{} started".format(__file__))
    start_time = time.perf_counter()

    pr = None
    if settings.enable_profiler:
        pr = cProfile.Profile()
        pr.enable()

    args = parse_args(sys.argv[1:])
    db = JdsDatabase()
    kanji_info_results = db.get_kanji_info_raw()
    kanji_count_results = db.get_kanji_count_raw()

    with open(settings.csv_path_kanji, mode='w', encoding='utf-8',
              newline='') as csv_file:
        fieldnames = [
            'kanji', 'count', 'freq', 'cumul_freq', 'drama_freq',
            'episode_freq', 'jdpt', 'jdpt_pos', 'jouyou', 'jouyou_pos'
        ]
        writer = csv.DictWriter(csv_file,
                                fieldnames=fieldnames,
                                delimiter='\t')
        rows = {}
        writer.writeheader()