def _store_tree(root_node):
        """
        Stores the reading alternation tree to the database.
        """
        # Build our list of results.
        def iter_results(tree):
            for node in tree.walk():
                yield (node.label, node.code, node.probability,
                        node.left_visit, node.right_visit)
            return

        # Insert them to the database.
        cursor = connection.cursor()
        cursor.execute('DELETE FROM reading_alt_kanjireading')
        cursor.execute('DELETE FROM reading_alt_readingalternation')
        max_per_run = 10000
        all_results = iter_results(root_node)

        for results in groups_of_n(max_per_run, all_results):
            cursor.executemany(
                    """
                    INSERT INTO reading_alt_readingalternation 
                    (value, code, probability, left_visit, right_visit)
                    VALUES (%s, %s, %s, %s, %s)
                    """,
                    results,
                )

        cursor.close()
        return
    def _store_tree(root_node):
        """
        Stores the reading alternation tree to the database.
        """

        # Build our list of results.
        def iter_results(tree):
            for node in tree.walk():
                yield (node.label, node.code, node.probability,
                       node.left_visit, node.right_visit)
            return

        # Insert them to the database.
        cursor = connection.cursor()
        cursor.execute('DELETE FROM reading_alt_kanjireading')
        cursor.execute('DELETE FROM reading_alt_readingalternation')
        max_per_run = 10000
        all_results = iter_results(root_node)

        for results in groups_of_n(max_per_run, all_results):
            cursor.executemany(
                """
                    INSERT INTO reading_alt_readingalternation 
                    (value, code, probability, left_visit, right_visit)
                    VALUES (%s, %s, %s, %s, %s)
                    """,
                results,
            )

        cursor.close()
        return
    def _store_kanji_readings(alt_tree):
        "Stores a separate table of only leaf-node readings."

        def iter_results(tree):
            for kanji_node in tree.children.itervalues():
                kanji = kanji_node.label

                reading_map = {}
                for leaf_node in kanji_node.walk_leaves():
                    # Calculate the probability for this path.
                    reading = leaf_node.label
                    leaf_path = leaf_node.get_ancestors()[1:]
                    pdf = math.exp(sum([n.probability for n in leaf_path]))
                    codes = set([n.code for n in leaf_path])
                    if reading not in reading_map or \
                            reading_map[reading]['pdf'] < pdf:
                        reading_map[reading] = {'pdf': pdf, 'codes': codes}

                if not reading_map:
                    # No readings for this kanji
                    continue

                total = sum(r['pdf'] for r in reading_map.itervalues())
                cdf = 0.0
                for reading, entry in reading_map.iteritems():
                    pdf = entry['pdf'] / total
                    cdf += pdf
                    yield (kanji, reading, ''.join(sorted(entry['codes'])),
                           pdf, cdf, leaf_path[-1].left_visit)
                assert abs(cdf - 1.0) < 1e-8
            return

        max_per_insert = 10000
        all_results = iter_results(alt_tree)
        cursor = connection.cursor()
        cursor.execute('DELETE FROM reading_alt_kanjireading')

        quoted_fields = tuple(
            connection.ops.quote_name(f) for f in [
                'condition', 'symbol', 'alternations', 'pdf', 'cdf',
                'reading_alternation_id'
            ])
        for results in groups_of_n(max_per_insert, all_results):
            cursor.executemany(
                """
                    INSERT INTO reading_alt_kanjireading
                    (%s)
                    VALUES (%%s, %%s, %%s, %%s, %%s, %%s)
                    """ % ', '.join(quoted_fields), results)
            cursor.execute('COMMIT')

        cursor.close()

        return
    def _store_kanji_readings(alt_tree):
        "Stores a separate table of only leaf-node readings."
        def iter_results(tree):
            for kanji_node in tree.children.itervalues():
                kanji = kanji_node.label

                reading_map = {}
                for leaf_node in kanji_node.walk_leaves():
                    # Calculate the probability for this path.
                    reading = leaf_node.label
                    leaf_path = leaf_node.get_ancestors()[1:]
                    pdf = math.exp(sum([n.probability for n in leaf_path]))
                    codes = set([n.code for n in leaf_path])
                    if reading not in reading_map or \
                            reading_map[reading]['pdf'] < pdf:
                        reading_map[reading] = {'pdf': pdf, 'codes': codes}

                if not reading_map:
                    # No readings for this kanji
                    continue

                total = sum(r['pdf'] for r in reading_map.itervalues())
                cdf = 0.0
                for reading, entry in reading_map.iteritems():
                    pdf = entry['pdf'] / total
                    cdf += pdf
                    yield (kanji, reading, ''.join(sorted(entry['codes'])),
                            pdf, cdf, leaf_path[-1].left_visit)
                assert abs(cdf - 1.0) < 1e-8
            return

        max_per_insert = 10000
        all_results = iter_results(alt_tree)
        cursor = connection.cursor()
        cursor.execute('DELETE FROM reading_alt_kanjireading')

        quoted_fields = tuple(connection.ops.quote_name(f) for f in
            ['condition', 'symbol', 'alternations', 'pdf', 'cdf',
            'reading_alternation_id'])
        for results in groups_of_n(max_per_insert, all_results):
            cursor.executemany(
                    """
                    INSERT INTO reading_alt_kanjireading
                    (%s)
                    VALUES (%%s, %%s, %%s, %%s, %%s, %%s)
                    """ % ', '.join(quoted_fields),
                    results
                )
            cursor.execute('COMMIT')

        cursor.close()

        return
Beispiel #5
0
    def from_dist(cls, prob_dist):
        table_name = cls._meta.db_table
        cursor = connection.cursor()
        cursor.execute('DELETE FROM %s' % table_name)

        rows = []
        cdf = 0.0
        for symbol in prob_dist.samples():
            pdf = prob_dist.freq(symbol)
            cdf += pdf
            rows.append((symbol, pdf, cdf))

        for row_set in groups_of_n(N_ROWS_PER_INSERT, rows):
            cursor.executemany(
                """
                    INSERT INTO `%s` (`symbol`, `pdf`, `cdf`)
                    VALUES (%%s, %%s, %%s)
                    """ % table_name, row_set)
        cursor.close()
Beispiel #6
0
    def from_dist(cls, prob_dist):
        table_name = cls._meta.db_table
        cursor = connection.cursor()
        cursor.execute('DELETE FROM %s' % table_name)

        rows = []
        cdf = 0.0
        for symbol in prob_dist.samples():
            pdf = prob_dist.freq(symbol)
            cdf += pdf
            rows.append((symbol, pdf, cdf))

        for row_set in groups_of_n(N_ROWS_PER_INSERT, rows):
            cursor.executemany(
                    """
                    INSERT INTO `%s` (`symbol`, `pdf`, `cdf`)
                    VALUES (%%s, %%s, %%s)
                    """ % table_name,
                    row_set
                )
        cursor.close()