Esempio n. 1
0
    def read_chars(self):
        dramas = self.db.get_all_dramas()

        with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
            while len(dramas) > 0:
                try:
                    futures = {}
                    for drama in dramas:
                        if drama.kanji_ok is 1:
                            dramas.remove(drama)
                            print("kanji_ok TRUE -> {} skipped".format(
                                drama.uid))
                            continue

                        futures[drama] = executor.submit(
                            self.read_chars_worker, drama)
                        dramas.remove(drama)
                        if len(futures) > 10:
                            break
                    for future in concurrent.futures.as_completed(
                            futures.values()):
                        chars = future.result()
                        self.db.push_chars_count(chars)
                except Error as e:
                    exception(e)
        self.db.push_chars()
Esempio n. 2
0
    def __cursor_execute_thread_safe(self, sql):
        with JdsDatabase.__lock:
            if settings.print_sql:
                print(sql)
                cur_start_time = time.perf_counter()
            try:
                self.__cursor.execute(sql)
                self.__db.commit()
            except Error as e:
                # try again
                try:
                    self.__db.reconnect(3, 3)
                    if self.__check_state():
                        self.__cursor.execute(sql)
                        self.__db.commit()
                    else:
                        print("Could not reconnect to server")
                except Error as e:
                    exception(e)
                    return False

            if settings.print_sql:
                print("in {:2.2f}".format(time.perf_counter() -
                                          cur_start_time))
        return True
Esempio n. 3
0
    def line_ref_worker(self, drama):
        """
        threaded worker that build references of characters with lines.
        requires drama,lines to be in the DB beforehand
        :param drama:
        :return:
        """
        lines = {}  # key = char, value = [] of line_uid
        jds_lines = self.db.get_lines_for_drama(drama)
        print("start line_ref_worker for {}".format(drama.value))
        cur_start_time = time.perf_counter()
        for jds_line in jds_lines:
            for char in jds_line.value:
                try:
                    if char not in lines:
                        lines[char] = []
                    lines[char].append(jds_line.uid)
                except Exception as e:
                    exception(e)

        jds_chars = {}
        for char in lines:
            new_char = JdsChar.from_drama(char, drama.uid)
            new_char.add_line_refs(lines[char][:10])
            jds_chars[char] = new_char
        if "\n" in lines:
            del lines[JdsChar("\n")]
            print("Deleted \\n")
        run_time = time.perf_counter() - cur_start_time
        print("stop line_ref_worker for {} with {} chars in {}".format(
            drama.value, len(lines), run_time))
        return jds_chars
Esempio n. 4
0
 def get_count_for_drama(self, drama):
     if not JdsDatabase.__check_state():
         return
     sql = "SELECT * FROM count WHERE drama_uid={} ".format(drama.uid)
     results = self.__cursor_execute_fetchall_thread_safe(sql)
     res = {}
     try:
         for result in results:
             res[result['kanji_uid']] = result['count']
     except Exception as e:
         exception(e)
     return res
Esempio n. 5
0
 def get_all_chars(self):
     if not JdsDatabase.__check_state():
         return
     sql = "SELECT * FROM kanji "
     results = self.__cursor_execute_fetchall_thread_safe(sql)
     chars = {}
     try:
         for result in results:
             chars[result['kanji_uid']] = JdsChar(chr(result['kanji_uid']))
     except Exception as e:
         exception(e)
     return chars
Esempio n. 6
0
 def get_lines_for_drama(self, drama):
     if not JdsDatabase.__check_state():
         return
     sql = "SELECT * FROM line WHERE drama_uid={}".format(drama.uid)
     results = self.__cursor_execute_fetchall_thread_safe(sql)
     lines = []
     try:
         for result in results:
             lines.append(
                 JdsLine(result['line_uid'], result['drama_uid'],
                         result['value'], result['episode_uid']))
     except Exception as e:
         exception(e)
     return lines
Esempio n. 7
0
 def __cursor_execute_fetchone_thread_safe(self, sql):
     with JdsDatabase.__lock:
         if settings.print_sql:
             print(sql)
             cur_start_time = time.perf_counter()
         try:
             self.__cursor.execute(sql)
         except Error as e:
             try:
                 self.__db.reconnect(3, 5)
             except Error as e:
                 exception(e)
         if settings.print_sql:
             print("in {:2.2f}".format(time.perf_counter() -
                                       cur_start_time))
         return JdsDatabase.__cursor.fetchone()
Esempio n. 8
0
    def get_all_lines_by_drama(self):
        if not JdsDatabase.__check_state():
            return
        sql = "SELECT * FROM line "

        results = self.__cursor_execute_fetchall_thread_safe(sql)

        lines_by_drama = {}
        try:
            for result in results:
                if result['drama_uid'] not in lines_by_drama:
                    lines_by_drama[result['drama_uid']] = []
                lines_by_drama[result['drama_uid']].append(
                    JdsLine(result['line_uid'], result['drama_uid'],
                            result['value'], result['episode_uid']))
        except Exception as e:
            exception(e)
        return lines_by_drama
Esempio n. 9
0
 def get_all_chars_with_count(self):
     if not JdsDatabase.__check_state():
         return
     sql = """ SELECT a.value, a.kanji_uid, b.count
                 FROM kanji a
                 INNER JOIN count b
                 ON a.kanji_uid = b.kanji_uid
                 WHERE b.drama_uid = 0
             """
     results = self.__cursor_execute_fetchall_thread_safe(sql)
     chars = {}
     try:
         for result in results:
             c = JdsChar(chr(result['kanji_uid']))
             c.set_count(result['count'])
             chars[result['kanji_uid']] = c
     except Exception as e:
         exception(e)
     return chars
Esempio n. 10
0
    def read_chars_worker(self, drama):
        """
        threaded worker that counts all characters for a given drama, by getting all lines from the DB and counting the char.
        requires drama,lines to be in the DB beforehand
        :param drama:
        :return:
        """
        chars = {}  # key = char, value = count
        episodes = {}

        print("start read_chars_worker for {}".format(drama.value))
        jds_lines = self.db.get_lines_for_drama(drama)
        cur_start_time = time.perf_counter()
        for jds_line in jds_lines:
            try:
                for char in jds_line.value:
                    if char not in chars:
                        chars[char] = 0
                        episodes[char] = set()
                    chars[char] = chars[char] + 1
                    if jds_line.episode_uid not in episodes[char]:
                        episodes[char].add(jds_line.episode_uid)
            except Exception as e:
                exception(e)

        jds_chars = {}
        for char in chars:
            new_char = JdsChar.from_drama(char, drama.uid)
            new_char.set_count(chars[char])
            new_char.episode_count = len(episodes[char])
            jds_chars[char] = new_char
        if "\n" in chars:
            del chars[JdsChar("\n")]
            print("Deleted \\n")
        run_time = time.perf_counter() - cur_start_time
        print("stop read_chars_worker for {} with {} chars in {:2.2f}".format(
            drama.value, len(chars), run_time))
        return jds_chars
Esempio n. 11
0
    def read_lines(self):
        line_id = 0
        subfolders = DccUtils.get_subfolders(self.args["path"])
        with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
            try:
                while len(subfolders) > 0:
                    futures = {}
                    for subfolder in subfolders:
                        futures[subfolder] = executor.submit(
                            self.line_ref_worker, subfolder)
                        subfolders.remove(subfolder)
                        if len(futures) > 15:
                            break

                    for future in concurrent.futures.as_completed(
                            futures.values()):
                        lines = future.result()
                        for line in lines:
                            line.uid = line_id
                            line_id += 1
                        self.db.push_lines(lines)
            except Error as e:
                exception(e)
Esempio n. 12
0
def main(argv):
    args = parse_args(argv)
    path = args["path"]

    ans = input(
        "WARNING: this will remove all non-readable lines in ALL files in folder & nested folders in {}\n Continue? (y/n)"
        .format(path))
    while ans != "y" and ans != "n":
        ans = input(
            "WARNING: this will remove all non-readable lines in ALL files in folder & nested folders in {}\n Continue? (y/n)"
            .format(path))
    if ans == "n":
        print("CleanSubtitles canceled")
        return

    subfolders = get_subfolders(path)

    for subfolder in subfolders:
        for filepath in get_files(subfolder):
            print("doing {}".format(filepath))
            with open(filepath, 'r', encoding='utf-8') as file_r:
                try:
                    lines = file_r.readlines()
                except Exception as e:
                    exception(e)
                    continue

                if lines:
                    with open(filepath, 'w', encoding='utf-8') as file_w:
                        for line in lines:
                            if re.search(
                                    "[一-龠]+|[ぁ-ゔ]+|[ァ-ヴー]+|[a-zA-Z]+|[a-zA-Z]+|[々〆〤]+",
                                    line):
                                file_w.write(line)
                            else:
                                pass