def __init__(self, db: DatabaseHandler, snapshots_id: int): super().__init__() snapshots_id = int(snapshots_id) # Verify that topic exists if db.find_by_id(table='snapshots', object_id=snapshots_id) is None: raise McWord2vecException("Snapshot with ID %d does not exist." % snapshots_id) self.__snapshots_id = snapshots_id self.__sentence_counter = 0 # Subselect such as: # # SELECT sentence # FROM story_sentences # WHERE stories_id IN ( # SELECT stories_id # FROM snap.snapshots # WHERE snapshots_id = ... # ) # # or its variants (e.g. INNER JOIN) makes the query planner decide on a sequential scan on "story_sentences", # so we create a temporary table with snapshot's "stories_id" first. log.info("Creating a temporary table with snapshot's stories_id...") snapshots_stories_id_temp_table_name = 'snapshot_stories_ids_{}'.format( random_string(32)) db.query( """ CREATE TEMPORARY TABLE {} AS SELECT stories_id FROM snap.stories WHERE snapshots_id = %(snapshots_id)s """.format(snapshots_stories_id_temp_table_name), {'snapshots_id': snapshots_id}) # "INNER JOIN" instead of "WHERE stories_id IN (SELECT ...)" here because then database doesn't have to compute # distinct "stories_id" to SELECT sentence FROM story_sentences against, i.e. it doesn't have to # Group + HashAggregate on the temporary table. log.info("Creating COPY TO object...") self.__copy_to = db.copy_to(""" COPY ( SELECT story_sentences.sentence FROM {} AS snapshot_stories_ids INNER JOIN story_sentences ON snapshot_stories_ids.stories_id = story_sentences.stories_id ) TO STDOUT WITH CSV """.format(snapshots_stories_id_temp_table_name))
def __init__(self, db: DatabaseHandler, snapshots_id: int): super().__init__() snapshots_id = int(snapshots_id) # Verify that topic exists if db.find_by_id(table='snapshots', object_id=snapshots_id) is None: raise McWord2vecException("Snapshot with ID %d does not exist." % snapshots_id) self.__snapshots_id = snapshots_id self.__sentence_counter = 0 log.info("Creating COPY TO object...") self.__copy_to = db.copy_to(""" COPY ( SELECT story_sentences.sentence FROM snap.stories INNER JOIN story_sentences ON snap.stories.stories_id = story_sentences.stories_id WHERE snap.stories.snapshots_id = %d ) TO STDOUT WITH CSV """ % snapshots_id)