Example #1
0
class JdsCharHandler:
    def __init__(self, argv):
        self.args = parse_args(argv)
        self.db = JdsDatabase()

    def reset(self):
        return self.db.reset_chars()

    def read_chars_worker(self, drama):
        """
        threaded worker that counts all characters for a given drama, by getting all lines from the DB and counting the char.
        requires drama,lines to be in the DB beforehand
        :param drama:
        :return:
        """
        chars = {}  # key = char, value = count
        episodes = {}

        print("start read_chars_worker for {}".format(drama.value))
        jds_lines = self.db.get_lines_for_drama(drama)
        cur_start_time = time.perf_counter()
        for jds_line in jds_lines:
            try:
                for char in jds_line.value:
                    if char not in chars:
                        chars[char] = 0
                        episodes[char] = set()
                    chars[char] = chars[char] + 1
                    if jds_line.episode_uid not in episodes[char]:
                        episodes[char].add(jds_line.episode_uid)
            except Exception as e:
                exception(e)

        jds_chars = {}
        for char in chars:
            new_char = JdsChar.from_drama(char, drama.uid)
            new_char.set_count(chars[char])
            new_char.episode_count = len(episodes[char])
            jds_chars[char] = new_char
        if "\n" in chars:
            del chars[JdsChar("\n")]
            print("Deleted \\n")
        run_time = time.perf_counter() - cur_start_time
        print("stop read_chars_worker for {} with {} chars in {:2.2f}".format(
            drama.value, len(chars), run_time))
        return jds_chars

    def read_chars(self):
        dramas = self.db.get_all_dramas()

        with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
            while len(dramas) > 0:
                try:
                    futures = {}
                    for drama in dramas:
                        if drama.kanji_ok is 1:
                            dramas.remove(drama)
                            print("kanji_ok TRUE -> {} skipped".format(
                                drama.uid))
                            continue

                        futures[drama] = executor.submit(
                            self.read_chars_worker, drama)
                        dramas.remove(drama)
                        if len(futures) > 10:
                            break
                    for future in concurrent.futures.as_completed(
                            futures.values()):
                        chars = future.result()
                        self.db.push_chars_count(chars)
                except Error as e:
                    exception(e)
        self.db.push_chars()

    def create_tables(self):
        self.db.create_char_tables()
class JdsLineRefHandler:
    def __init__(self, argv):
        self.args = parse_args(argv)
        self.db = JdsDatabase()

    def reset(self):
        return self.db.reset_line_refs()

    def line_ref_worker(self, drama):
        """
        threaded worker that build references of characters with lines.
        requires drama,lines to be in the DB beforehand
        :param drama:
        :return:
        """
        lines = {}  # key = char, value = [] of line_uid
        jds_lines = self.db.get_lines_for_drama(drama)
        print("start line_ref_worker for {}".format(drama.value))
        cur_start_time = time.perf_counter()
        for jds_line in jds_lines:
            for char in jds_line.value:
                try:
                    if char not in lines:
                        lines[char] = []
                    lines[char].append(jds_line.uid)
                except Exception as e:
                    exception(e)

        jds_chars = {}
        for char in lines:
            new_char = JdsChar.from_drama(char, drama.uid)
            new_char.add_line_refs(lines[char][:10])
            jds_chars[char] = new_char
        if "\n" in lines:
            del lines[JdsChar("\n")]
            print("Deleted \\n")
        run_time = time.perf_counter() - cur_start_time
        print("stop line_ref_worker for {} with {} chars in {}".format(
            drama.value, len(lines), run_time))
        return jds_chars

    def do_line_ref(self):
        dramas = self.db.get_all_dramas()

        with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
            while len(dramas) > 0:
                try:
                    futures = {}
                    for drama in dramas:
                        if drama.kanji_line_ref_ok is 1:
                            print(
                                "kanji_line_ref_ok TRUE -> {} skipped".format(
                                    drama.uid))
                            dramas.remove(drama)
                            continue

                        futures[drama] = executor.submit(
                            self.line_ref_worker, drama)
                        dramas.remove(drama)
                        if len(futures) > 15:
                            break
                    for future in concurrent.futures.as_completed(
                            futures.values()):
                        chars = future.result()
                        self.db.push_chars_to_line(chars)
                except Error as e:
                    exception(e)