Exemple #1
0
    def __init__(self, db: DatabaseHandler, snapshots_id: int):
        super().__init__()

        snapshots_id = int(snapshots_id)

        # Verify that topic exists
        if db.find_by_id(table='snapshots', object_id=snapshots_id) is None:
            raise McWord2vecException("Snapshot with ID %d does not exist." %
                                      snapshots_id)

        self.__snapshots_id = snapshots_id
        self.__sentence_counter = 0

        # Subselect such as:
        #
        #     SELECT sentence
        #     FROM story_sentences
        #     WHERE stories_id IN (
        #         SELECT stories_id
        #         FROM snap.snapshots
        #         WHERE snapshots_id = ...
        #     )
        #
        # or its variants (e.g. INNER JOIN) makes the query planner decide on a sequential scan on "story_sentences",
        # so we create a temporary table with snapshot's "stories_id" first.
        log.info("Creating a temporary table with snapshot's stories_id...")
        snapshots_stories_id_temp_table_name = 'snapshot_stories_ids_{}'.format(
            random_string(32))
        db.query(
            """
            CREATE TEMPORARY TABLE {} AS
                SELECT stories_id
                FROM snap.stories
                WHERE snapshots_id = %(snapshots_id)s
        """.format(snapshots_stories_id_temp_table_name),
            {'snapshots_id': snapshots_id})

        # "INNER JOIN" instead of "WHERE stories_id IN (SELECT ...)" here because then database doesn't have to compute
        # distinct "stories_id" to SELECT sentence FROM story_sentences against, i.e. it doesn't have to
        # Group + HashAggregate on the temporary table.
        log.info("Creating COPY TO object...")
        self.__copy_to = db.copy_to("""
            COPY (
                SELECT story_sentences.sentence
                FROM {} AS snapshot_stories_ids
                    INNER JOIN story_sentences
                        ON snapshot_stories_ids.stories_id = story_sentences.stories_id
            ) TO STDOUT WITH CSV
        """.format(snapshots_stories_id_temp_table_name))
    def __init__(self, db: DatabaseHandler, snapshots_id: int):
        super().__init__()

        snapshots_id = int(snapshots_id)

        # Verify that topic exists
        if db.find_by_id(table='snapshots', object_id=snapshots_id) is None:
            raise McWord2vecException("Snapshot with ID %d does not exist." % snapshots_id)

        self.__snapshots_id = snapshots_id

        self.__sentence_counter = 0

        log.info("Creating COPY TO object...")
        self.__copy_to = db.copy_to("""
            COPY (
                SELECT story_sentences.sentence
                FROM snap.stories
                    INNER JOIN story_sentences
                        ON snap.stories.stories_id = story_sentences.stories_id
                WHERE snap.stories.snapshots_id = %d
            ) TO STDOUT WITH CSV
        """ % snapshots_id)