def Generate(self, file_service_id, tag_service_id, populate_from_storage=True, status_hook=None): table_generation_dict = self._GetServiceTableGenerationDictSingle( file_service_id, tag_service_id) for (table_name, (create_query_without_name, version_added)) in table_generation_dict.items(): self._Execute(create_query_without_name.format(table_name)) if populate_from_storage: if status_hook is not None: status_hook('copying storage') ( cache_current_mappings_table_name, cache_deleted_mappings_table_name, cache_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames( file_service_id, tag_service_id) ( cache_display_current_mappings_table_name, cache_display_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames( file_service_id, tag_service_id) self._Execute( 'INSERT OR IGNORE INTO {} ( hash_id, tag_id ) SELECT hash_id, tag_id FROM {};' .format(cache_display_current_mappings_table_name, cache_current_mappings_table_name)) self._Execute( 'INSERT OR IGNORE INTO {} ( hash_id, tag_id ) SELECT hash_id, tag_id FROM {};' .format(cache_display_pending_mappings_table_name, cache_pending_mappings_table_name)) self.modules_mappings_counts.CreateTables( ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id, populate_from_storage=populate_from_storage) if status_hook is not None: status_hook('optimising data') index_generation_dict = self._GetServiceIndexGenerationDictSingle( file_service_id, tag_service_id) for (table_name, columns, unique, version_added ) in self._FlattenIndexGenerationDict(index_generation_dict): self._CreateIndex(table_name, columns, unique=unique)
def DeleteFiles(self, file_service_id, tag_service_id, hash_ids, hash_id_table_name): ( cache_display_current_mappings_table_name, cache_display_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames( file_service_id, tag_service_id) # temp hashes to mappings current_mapping_ids_raw = self._Execute( 'SELECT tag_id, hash_id FROM {} CROSS JOIN {} USING ( hash_id );'. format(hash_id_table_name, cache_display_current_mappings_table_name)).fetchall() current_mapping_ids_dict = HydrusData.BuildKeyToSetDict( current_mapping_ids_raw) # temp hashes to mappings pending_mapping_ids_raw = self._Execute( 'SELECT tag_id, hash_id FROM {} CROSS JOIN {} USING ( hash_id );'. format(hash_id_table_name, cache_display_pending_mappings_table_name)).fetchall() pending_mapping_ids_dict = HydrusData.BuildKeyToSetDict( pending_mapping_ids_raw) all_ids_seen = set(current_mapping_ids_dict.keys()) all_ids_seen.update(pending_mapping_ids_dict.keys()) counts_cache_changes = [] for tag_id in all_ids_seen: current_hash_ids = current_mapping_ids_dict[tag_id] num_current = len(current_hash_ids) # pending_hash_ids = pending_mapping_ids_dict[tag_id] num_pending = len(pending_hash_ids) counts_cache_changes.append((tag_id, num_current, num_pending)) self._ExecuteMany( 'DELETE FROM ' + cache_display_current_mappings_table_name + ' WHERE hash_id = ?;', ((hash_id, ) for hash_id in hash_ids)) self._ExecuteMany( 'DELETE FROM ' + cache_display_pending_mappings_table_name + ' WHERE hash_id = ?;', ((hash_id, ) for hash_id in hash_ids)) if len(counts_cache_changes) > 0: self.modules_mappings_counts_update.ReduceCounts( ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id, counts_cache_changes)
def RegeneratePending(self, tag_service_id, status_hook=None): (current_mappings_table_name, deleted_mappings_table_name, pending_mappings_table_name, petitioned_mappings_table_name ) = ClientDBMappingsStorage.GenerateMappingsTableNames(tag_service_id) if status_hook is not None: message = 'clearing old combined display data' status_hook(message) all_pending_storage_tag_ids = self._STS( self._Execute('SELECT DISTINCT tag_id FROM {};'.format( pending_mappings_table_name))) self.modules_mappings_counts.ClearCounts( ClientTags.TAG_DISPLAY_STORAGE, self.modules_services.combined_file_service_id, tag_service_id, keep_current=True) counts_cache_changes = [] num_to_do = len(all_pending_storage_tag_ids) for (i, storage_tag_id) in enumerate(all_pending_storage_tag_ids): if i % 100 == 0 and status_hook is not None: message = 'regenerating pending tags {}'.format( HydrusData.ConvertValueRangeToPrettyString( i + 1, num_to_do)) status_hook(message) (pending_delta, ) = self._Execute( 'SELECT COUNT( DISTINCT hash_id ) FROM {} WHERE tag_id = ?;'. format(pending_mappings_table_name), (storage_tag_id, )).fetchone() counts_cache_changes.append((storage_tag_id, 0, pending_delta)) self.modules_mappings_counts_update.AddCounts( ClientTags.TAG_DISPLAY_STORAGE, self.modules_services.combined_file_service_id, tag_service_id, counts_cache_changes) self.modules_mappings_cache_combined_files_display.RegeneratePending( tag_service_id, status_hook=status_hook)
def Drop(self, file_service_id, tag_service_id): ( cache_display_current_mappings_table_name, cache_display_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames( file_service_id, tag_service_id) self._Execute('DROP TABLE IF EXISTS {};'.format( cache_display_current_mappings_table_name)) self._Execute('DROP TABLE IF EXISTS {};'.format( cache_display_pending_mappings_table_name)) self.modules_mappings_counts.DropTables(ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id)
def Clear(self, file_service_id, tag_service_id, keep_pending=False): ( cache_display_current_mappings_table_name, cache_display_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames( file_service_id, tag_service_id) self._Execute('DELETE FROM {};'.format( cache_display_current_mappings_table_name)) if not keep_pending: self._Execute('DELETE FROM {};'.format( cache_display_pending_mappings_table_name)) self.modules_mappings_counts.ClearCounts(ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id, keep_pending=keep_pending)
def _GetServiceTableGenerationDictSingle(self, file_service_id, tag_service_id): table_dict = {} ( cache_display_current_mappings_table_name, cache_display_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames( file_service_id, tag_service_id) version = 486 if file_service_id == self.modules_services.combined_local_media_service_id else 400 table_dict[cache_display_current_mappings_table_name] = ( 'CREATE TABLE IF NOT EXISTS {} ( hash_id INTEGER, tag_id INTEGER, PRIMARY KEY ( hash_id, tag_id ) ) WITHOUT ROWID;', version) table_dict[cache_display_pending_mappings_table_name] = ( 'CREATE TABLE IF NOT EXISTS {} ( hash_id INTEGER, tag_id INTEGER, PRIMARY KEY ( hash_id, tag_id ) ) WITHOUT ROWID;', version) return table_dict
def _GetServiceIndexGenerationDictSingle(self, file_service_id, tag_service_id): ( cache_display_current_mappings_table_name, cache_display_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames( file_service_id, tag_service_id) version = 486 if file_service_id == self.modules_services.combined_local_media_service_id else 400 index_generation_dict = {} index_generation_dict[cache_display_current_mappings_table_name] = [ (['tag_id', 'hash_id'], True, version) ] index_generation_dict[cache_display_pending_mappings_table_name] = [ (['tag_id', 'hash_id'], True, version) ] return index_generation_dict
def AddMappings(self, file_service_id, tag_service_id, tag_id, hash_ids): # this guy doesn't do rescind pend because of storage calculation issues that need that to occur before deletes to storage tables ( cache_display_current_mappings_table_name, cache_display_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames( file_service_id, tag_service_id) display_tag_ids = self.modules_tag_display.GetImplies( ClientTags.TAG_DISPLAY_ACTUAL, tag_service_id, tag_id) ac_counts = collections.Counter() for display_tag_id in display_tag_ids: self._ExecuteMany( 'INSERT OR IGNORE INTO ' + cache_display_current_mappings_table_name + ' ( hash_id, tag_id ) VALUES ( ?, ? );', ((hash_id, display_tag_id) for hash_id in hash_ids)) num_added = self._GetRowCount() if num_added > 0: ac_counts[display_tag_id] += num_added if len(ac_counts) > 0: counts_cache_changes = [(tag_id, current_delta, 0) for (tag_id, current_delta) in ac_counts.items()] self.modules_mappings_counts_update.AddCounts( ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id, counts_cache_changes)
def PendMappings(self, file_service_id, tag_service_id, tag_id, hash_ids): ( cache_display_current_mappings_table_name, cache_display_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames( file_service_id, tag_service_id) ac_counts = collections.Counter() display_tag_ids = self.modules_tag_display.GetImplies( ClientTags.TAG_DISPLAY_ACTUAL, tag_service_id, tag_id) for display_tag_id in display_tag_ids: self._ExecuteMany( 'INSERT OR IGNORE INTO ' + cache_display_pending_mappings_table_name + ' ( hash_id, tag_id ) VALUES ( ?, ? );', ((hash_id, display_tag_id) for hash_id in hash_ids)) num_added = self._GetRowCount() if num_added > 0: ac_counts[display_tag_id] += num_added if len(ac_counts) > 0: counts_cache_changes = [(tag_id, 0, pending_delta) for (tag_id, pending_delta) in ac_counts.items()] self.modules_mappings_counts_update.AddCounts( ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id, counts_cache_changes)
def AddImplications(self, file_service_id, tag_service_id, implication_tag_ids, tag_id, status_hook=None): if len(implication_tag_ids) == 0: return (cache_current_mappings_table_name, cache_deleted_mappings_table_name, cache_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames( file_service_id, tag_service_id) ( cache_display_current_mappings_table_name, cache_display_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames( file_service_id, tag_service_id) statuses_to_count_delta = collections.Counter() ( current_implication_tag_ids, current_implication_tag_ids_weight, pending_implication_tag_ids, pending_implication_tag_ids_weight ) = self.modules_mappings_counts.GetCurrentPendingPositiveCountsAndWeights( ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id, implication_tag_ids) jobs = [] jobs.append( (HC.CONTENT_STATUS_CURRENT, cache_display_current_mappings_table_name, cache_current_mappings_table_name, current_implication_tag_ids, current_implication_tag_ids_weight)) jobs.append( (HC.CONTENT_STATUS_PENDING, cache_display_pending_mappings_table_name, cache_pending_mappings_table_name, pending_implication_tag_ids, pending_implication_tag_ids_weight)) for (status, cache_display_mappings_table_name, cache_mappings_table_name, add_tag_ids, add_tag_ids_weight) in jobs: if add_tag_ids_weight == 0: # nothing to actually add, so nbd continue if len(add_tag_ids) == 1: (add_tag_id, ) = add_tag_ids self._Execute( 'INSERT OR IGNORE INTO {} ( hash_id, tag_id ) SELECT hash_id, ? FROM {} WHERE tag_id = ?;' .format(cache_display_mappings_table_name, cache_mappings_table_name), (tag_id, add_tag_id)) statuses_to_count_delta[status] = self._GetRowCount() else: with self._MakeTemporaryIntegerTable( add_tag_ids, 'tag_id') as temp_tag_ids_table_name: # for all new implications, get files with those tags and not existing self._Execute( 'INSERT OR IGNORE INTO {} ( hash_id, tag_id ) SELECT hash_id, ? FROM {} CROSS JOIN {} USING ( tag_id );' .format(cache_display_mappings_table_name, temp_tag_ids_table_name, cache_mappings_table_name), (tag_id, )) statuses_to_count_delta[status] = self._GetRowCount() current_delta = statuses_to_count_delta[HC.CONTENT_STATUS_CURRENT] pending_delta = statuses_to_count_delta[HC.CONTENT_STATUS_PENDING] if current_delta > 0 or pending_delta > 0: counts_cache_changes = ((tag_id, current_delta, pending_delta), ) self.modules_mappings_counts_update.AddCounts( ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id, counts_cache_changes)
def DeleteImplications(self, file_service_id, tag_service_id, implication_tag_ids, tag_id, status_hook=None): if len(implication_tag_ids) == 0: return statuses_to_count_delta = collections.Counter() (cache_current_mappings_table_name, cache_deleted_mappings_table_name, cache_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames( file_service_id, tag_service_id) ( cache_display_current_mappings_table_name, cache_display_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames( file_service_id, tag_service_id) remaining_implication_tag_ids = set( self.modules_tag_display.GetImpliedBy( ClientTags.TAG_DISPLAY_ACTUAL, tag_service_id, tag_id)).difference(implication_tag_ids) ( current_implication_tag_ids, current_implication_tag_ids_weight, pending_implication_tag_ids, pending_implication_tag_ids_weight ) = self.modules_mappings_counts.GetCurrentPendingPositiveCountsAndWeights( ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id, implication_tag_ids) ( current_remaining_implication_tag_ids, current_remaining_implication_tag_ids_weight, pending_remaining_implication_tag_ids, pending_remaining_implication_tag_ids_weight ) = self.modules_mappings_counts.GetCurrentPendingPositiveCountsAndWeights( ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id, remaining_implication_tag_ids) jobs = [] jobs.append( (HC.CONTENT_STATUS_CURRENT, cache_display_current_mappings_table_name, cache_current_mappings_table_name, current_implication_tag_ids, current_implication_tag_ids_weight, current_remaining_implication_tag_ids, current_remaining_implication_tag_ids_weight)) jobs.append( (HC.CONTENT_STATUS_PENDING, cache_display_pending_mappings_table_name, cache_pending_mappings_table_name, pending_implication_tag_ids, pending_implication_tag_ids_weight, pending_remaining_implication_tag_ids, pending_remaining_implication_tag_ids_weight)) for (status, cache_display_mappings_table_name, cache_mappings_table_name, removee_tag_ids, removee_tag_ids_weight, keep_tag_ids, keep_tag_ids_weight) in jobs: if removee_tag_ids_weight == 0: # nothing to remove, so nothing to do! continue # ultimately here, we are doing "delete all display mappings with hash_ids that have a storage mapping for a removee tag and no storage mappings for a keep tag # in order to reduce overhead, we go full meme and do a bunch of different situations with self._MakeTemporaryIntegerTable( [], 'tag_id') as temp_removee_tag_ids_table_name: with self._MakeTemporaryIntegerTable( [], 'tag_id') as temp_keep_tag_ids_table_name: if len(removee_tag_ids) == 1: (removee_tag_id, ) = removee_tag_ids hash_id_in_storage_remove = 'hash_id IN ( SELECT hash_id FROM {} WHERE tag_id = {} )'.format( cache_mappings_table_name, removee_tag_id) else: self._ExecuteMany( 'INSERT INTO {} ( tag_id ) VALUES ( ? );'.format( temp_removee_tag_ids_table_name), ((removee_tag_id, ) for removee_tag_id in removee_tag_ids)) hash_id_in_storage_remove = 'hash_id IN ( SELECT DISTINCT hash_id FROM {} CROSS JOIN {} USING ( tag_id ) )'.format( temp_removee_tag_ids_table_name, cache_mappings_table_name) if keep_tag_ids_weight == 0: predicates_phrase = hash_id_in_storage_remove else: # WARNING, WARNING: Big Brain Query, potentially great/awful # note that in the 'clever/file join' situation, the number of total mappings is many, but we are deleting a few # we want to precisely scan the status of the potential hashes to delete, not scan through them all to see what not to do # therefore, we do NOT EXISTS, which just scans the parts, rather than NOT IN, which does the whole query and then checks against all results if len(keep_tag_ids) == 1: (keep_tag_id, ) = keep_tag_ids if ClientDBMappingsStorage.DoingAFileJoinTagSearchIsFaster( removee_tag_ids_weight, keep_tag_ids_weight): hash_id_not_in_storage_keep = 'NOT EXISTS ( SELECT 1 FROM {} WHERE {}.hash_id = {}.hash_id and tag_id = {} )'.format( cache_mappings_table_name, cache_display_mappings_table_name, cache_mappings_table_name, keep_tag_id) else: hash_id_not_in_storage_keep = 'hash_id NOT IN ( SELECT hash_id FROM {} WHERE tag_id = {} )'.format( cache_mappings_table_name, keep_tag_id) else: self._ExecuteMany( 'INSERT INTO {} ( tag_id ) VALUES ( ? );'. format(temp_keep_tag_ids_table_name), ((keep_tag_id, ) for keep_tag_id in keep_tag_ids)) if ClientDBMappingsStorage.DoingAFileJoinTagSearchIsFaster( removee_tag_ids_weight, keep_tag_ids_weight): # (files to) mappings to temp tags hash_id_not_in_storage_keep = 'NOT EXISTS ( SELECT 1 FROM {} CROSS JOIN {} USING ( tag_id ) WHERE {}.hash_id = {}.hash_id )'.format( cache_mappings_table_name, temp_keep_tag_ids_table_name, cache_display_mappings_table_name, cache_mappings_table_name) else: # temp tags to mappings hash_id_not_in_storage_keep = ' hash_id NOT IN ( SELECT DISTINCT hash_id FROM {} CROSS JOIN {} USING ( tag_id ) )'.format( temp_keep_tag_ids_table_name, cache_mappings_table_name) predicates_phrase = '{} AND {}'.format( hash_id_in_storage_remove, hash_id_not_in_storage_keep) query = 'DELETE FROM {} WHERE tag_id = {} AND {};'.format( cache_display_mappings_table_name, tag_id, predicates_phrase) self._Execute(query) statuses_to_count_delta[status] = self._GetRowCount() current_delta = statuses_to_count_delta[HC.CONTENT_STATUS_CURRENT] pending_delta = statuses_to_count_delta[HC.CONTENT_STATUS_PENDING] if current_delta > 0 or pending_delta > 0: counts_cache_changes = ((tag_id, current_delta, pending_delta), ) self.modules_mappings_counts_update.ReduceCounts( ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id, counts_cache_changes)
def AddMappings( self, tag_service_id, tag_id, hash_ids, filtered_hashes_generator: FilteredHashesGenerator ): for ( file_service_id, filtered_hash_ids ) in filtered_hashes_generator.IterateHashes( hash_ids ): ( cache_current_mappings_table_name, cache_deleted_mappings_table_name, cache_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames( file_service_id, tag_service_id ) # we have to interleave this into the iterator so that if two siblings with the same ideal are pend->currented at once, we remain logic consistent for soletag lookups! self.modules_mappings_cache_specific_display.RescindPendingMappings( file_service_id, tag_service_id, tag_id, filtered_hash_ids ) self._ExecuteMany( 'DELETE FROM ' + cache_pending_mappings_table_name + ' WHERE hash_id = ? AND tag_id = ?;', ( ( hash_id, tag_id ) for hash_id in filtered_hash_ids ) ) num_pending_rescinded = self._GetRowCount() # self._ExecuteMany( 'INSERT OR IGNORE INTO ' + cache_current_mappings_table_name + ' ( hash_id, tag_id ) VALUES ( ?, ? );', ( ( hash_id, tag_id ) for hash_id in filtered_hash_ids ) ) num_current_inserted = self._GetRowCount() # self._ExecuteMany( 'DELETE FROM ' + cache_deleted_mappings_table_name + ' WHERE hash_id = ? AND tag_id = ?;', ( ( hash_id, tag_id ) for hash_id in filtered_hash_ids ) ) if num_current_inserted > 0: counts_cache_changes = [ ( tag_id, num_current_inserted, 0 ) ] self.modules_mappings_counts_update.AddCounts( ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id, counts_cache_changes ) if num_pending_rescinded > 0: counts_cache_changes = [ ( tag_id, 0, num_pending_rescinded ) ] self.modules_mappings_counts_update.ReduceCounts( ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id, counts_cache_changes ) self.modules_mappings_cache_specific_display.AddMappings( file_service_id, tag_service_id, tag_id, filtered_hash_ids )
def RegeneratePending( self, file_service_id, tag_service_id, status_hook = None ): ( current_mappings_table_name, deleted_mappings_table_name, pending_mappings_table_name, petitioned_mappings_table_name ) = ClientDBMappingsStorage.GenerateMappingsTableNames( tag_service_id ) ( cache_current_mappings_table_name, cache_deleted_mappings_table_name, cache_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames( file_service_id, tag_service_id ) if status_hook is not None: message = 'clearing old specific data' status_hook( message ) all_pending_storage_tag_ids = self._STS( self._Execute( 'SELECT DISTINCT tag_id FROM {};'.format( pending_mappings_table_name ) ) ) self.modules_mappings_counts.ClearCounts( ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id, keep_current = True ) self._Execute( 'DELETE FROM {};'.format( cache_pending_mappings_table_name ) ) counts_cache_changes = [] num_to_do = len( all_pending_storage_tag_ids ) select_table_join = self.modules_files_storage.GetTableJoinLimitedByFileDomain( file_service_id, pending_mappings_table_name, HC.CONTENT_STATUS_CURRENT ) for ( i, storage_tag_id ) in enumerate( all_pending_storage_tag_ids ): if i % 100 == 0 and status_hook is not None: message = 'regenerating pending tags {}'.format( HydrusData.ConvertValueRangeToPrettyString( i + 1, num_to_do ) ) status_hook( message ) self._Execute( 'INSERT OR IGNORE INTO {} ( tag_id, hash_id ) SELECT tag_id, hash_id FROM {} WHERE tag_id = ?;'.format( cache_pending_mappings_table_name, select_table_join ), ( storage_tag_id, ) ) pending_delta = self._GetRowCount() counts_cache_changes.append( ( storage_tag_id, 0, pending_delta ) ) self.modules_mappings_counts_update.AddCounts( ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id, counts_cache_changes ) self.modules_mappings_cache_specific_display.RegeneratePending( file_service_id, tag_service_id, status_hook = status_hook )
def PendMappings( self, tag_service_id, tag_id, hash_ids, filtered_hashes_generator: FilteredHashesGenerator ): for ( file_service_id, filtered_hash_ids ) in filtered_hashes_generator.IterateHashes( hash_ids ): ( cache_current_mappings_table_name, cache_deleted_mappings_table_name, cache_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames( file_service_id, tag_service_id ) self._ExecuteMany( 'INSERT OR IGNORE INTO ' + cache_pending_mappings_table_name + ' ( hash_id, tag_id ) VALUES ( ?, ? );', ( ( hash_id, tag_id ) for hash_id in filtered_hash_ids ) ) num_added = self._GetRowCount() if num_added > 0: counts_cache_changes = [ ( tag_id, 0, num_added ) ] self.modules_mappings_counts_update.AddCounts( ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id, counts_cache_changes ) self.modules_mappings_cache_specific_display.PendMappings( file_service_id, tag_service_id, tag_id, filtered_hash_ids )
def RescindPendingMappings( self, tag_service_id, tag_id, hash_ids, filtered_hashes_generator: FilteredHashesGenerator ): for ( file_service_id, filtered_hash_ids ) in filtered_hashes_generator.IterateHashes( hash_ids ): ( cache_current_mappings_table_name, cache_deleted_mappings_table_name, cache_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames( file_service_id, tag_service_id ) ac_counts = collections.Counter() self.modules_mappings_cache_specific_display.RescindPendingMappings( file_service_id, tag_service_id, tag_id, filtered_hash_ids ) self._ExecuteMany( 'DELETE FROM ' + cache_pending_mappings_table_name + ' WHERE hash_id = ? AND tag_id = ?;', ( ( hash_id, tag_id ) for hash_id in filtered_hash_ids ) ) num_deleted = self._GetRowCount() if num_deleted > 0: counts_cache_changes = [ ( tag_id, 0, num_deleted ) ] self.modules_mappings_counts_update.ReduceCounts( ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id, counts_cache_changes )
def Generate(self, tag_service_id): self.modules_mappings_counts.CreateTables( ClientTags.TAG_DISPLAY_STORAGE, self.modules_services.combined_file_service_id, tag_service_id) # (current_mappings_table_name, deleted_mappings_table_name, pending_mappings_table_name, petitioned_mappings_table_name ) = ClientDBMappingsStorage.GenerateMappingsTableNames(tag_service_id) current_mappings_exist = self._Execute( 'SELECT 1 FROM ' + current_mappings_table_name + ' LIMIT 1;').fetchone() is not None pending_mappings_exist = self._Execute( 'SELECT 1 FROM ' + pending_mappings_table_name + ' LIMIT 1;').fetchone() is not None if current_mappings_exist or pending_mappings_exist: # not worth iterating through all known tags for an empty service for (group_of_ids, num_done, num_to_do) in HydrusDB.ReadLargeIdQueryInSeparateChunks( self._c, 'SELECT tag_id FROM tags;', 10000): # must be a cleverer way of doing this with self._MakeTemporaryIntegerTable( group_of_ids, 'tag_id') as temp_table_name: current_counter = collections.Counter() # temp tags to mappings for (tag_id, count) in self._Execute( 'SELECT tag_id, COUNT( * ) FROM {} CROSS JOIN {} USING ( tag_id ) GROUP BY ( tag_id );' .format(temp_table_name, current_mappings_table_name)): current_counter[tag_id] = count pending_counter = collections.Counter() # temp tags to mappings for (tag_id, count) in self._Execute( 'SELECT tag_id, COUNT( * ) FROM {} CROSS JOIN {} USING ( tag_id ) GROUP BY ( tag_id );' .format(temp_table_name, pending_mappings_table_name)): pending_counter[tag_id] = count all_ids_seen = set(current_counter.keys()) all_ids_seen.update(pending_counter.keys()) counts_cache_changes = [(tag_id, current_counter[tag_id], pending_counter[tag_id]) for tag_id in all_ids_seen] if len(counts_cache_changes) > 0: self.modules_mappings_counts_update.AddCounts( ClientTags.TAG_DISPLAY_STORAGE, self.modules_services.combined_file_service_id, tag_service_id, counts_cache_changes) self.modules_mappings_cache_combined_files_display.Generate( tag_service_id)
def AddFiles( self, file_service_id, tag_service_id, hash_ids, hash_ids_table_name ): ( cache_current_mappings_table_name, cache_deleted_mappings_table_name, cache_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames( file_service_id, tag_service_id ) ( current_mappings_table_name, deleted_mappings_table_name, pending_mappings_table_name, petitioned_mappings_table_name ) = ClientDBMappingsStorage.GenerateMappingsTableNames( tag_service_id ) # deleted don't have a/c counts to update, so we can do it all in one go here self._Execute( 'INSERT OR IGNORE INTO {} ( hash_id, tag_id ) SELECT tag_id, hash_id FROM {} CROSS JOIN {} USING ( hash_id );'.format( cache_deleted_mappings_table_name, hash_ids_table_name, deleted_mappings_table_name ) ) # temp hashes to mappings current_mapping_ids_raw = self._Execute( 'SELECT tag_id, hash_id FROM {} CROSS JOIN {} USING ( hash_id );'.format( hash_ids_table_name, current_mappings_table_name ) ).fetchall() current_mapping_ids_dict = HydrusData.BuildKeyToSetDict( current_mapping_ids_raw ) # temp hashes to mappings pending_mapping_ids_raw = self._Execute( 'SELECT tag_id, hash_id FROM {} CROSS JOIN {} USING ( hash_id );'.format( hash_ids_table_name, pending_mappings_table_name ) ).fetchall() pending_mapping_ids_dict = HydrusData.BuildKeyToSetDict( pending_mapping_ids_raw ) all_ids_seen = set( current_mapping_ids_dict.keys() ) all_ids_seen.update( pending_mapping_ids_dict.keys() ) counts_cache_changes = [] for tag_id in all_ids_seen: current_hash_ids = current_mapping_ids_dict[ tag_id ] current_delta = len( current_hash_ids ) if current_delta > 0: self._ExecuteMany( 'INSERT OR IGNORE INTO ' + cache_current_mappings_table_name + ' ( hash_id, tag_id ) VALUES ( ?, ? );', ( ( hash_id, tag_id ) for hash_id in current_hash_ids ) ) current_delta = self._GetRowCount() # pending_hash_ids = pending_mapping_ids_dict[ tag_id ] pending_delta = len( pending_hash_ids ) if pending_delta > 0: self._ExecuteMany( 'INSERT OR IGNORE INTO ' + cache_pending_mappings_table_name + ' ( hash_id, tag_id ) VALUES ( ?, ? );', ( ( hash_id, tag_id ) for hash_id in pending_hash_ids ) ) pending_delta = self._GetRowCount() # if current_delta > 0 or pending_delta > 0: counts_cache_changes.append( ( tag_id, current_delta, pending_delta ) ) if len( counts_cache_changes ) > 0: self.modules_mappings_counts_update.AddCounts( ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id, counts_cache_changes )
def RegeneratePending(self, file_service_id, tag_service_id, status_hook=None): (cache_current_mappings_table_name, cache_deleted_mappings_table_name, cache_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames( file_service_id, tag_service_id) ( cache_display_current_mappings_table_name, cache_display_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames( file_service_id, tag_service_id) if status_hook is not None: message = 'clearing old specific display data' status_hook(message) all_pending_storage_tag_ids = self._STS( self._Execute('SELECT DISTINCT tag_id FROM {};'.format( cache_pending_mappings_table_name))) storage_tag_ids_to_display_tag_ids = self.modules_tag_display.GetTagsToImplies( ClientTags.TAG_DISPLAY_ACTUAL, tag_service_id, all_pending_storage_tag_ids) all_pending_display_tag_ids = set( itertools.chain.from_iterable( storage_tag_ids_to_display_tag_ids.values())) del all_pending_storage_tag_ids del storage_tag_ids_to_display_tag_ids self.modules_mappings_counts.ClearCounts(ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id, keep_current=True) self._Execute('DELETE FROM {};'.format( cache_display_pending_mappings_table_name)) all_pending_display_tag_ids_to_implied_by_storage_tag_ids = self.modules_tag_display.GetTagsToImpliedBy( ClientTags.TAG_DISPLAY_ACTUAL, tag_service_id, all_pending_display_tag_ids, tags_are_ideal=True) counts_cache_changes = [] num_to_do = len( all_pending_display_tag_ids_to_implied_by_storage_tag_ids) for (i, (display_tag_id, storage_tag_ids)) in enumerate( all_pending_display_tag_ids_to_implied_by_storage_tag_ids. items()): if i % 100 == 0 and status_hook is not None: message = 'regenerating pending tags {}'.format( HydrusData.ConvertValueRangeToPrettyString( i + 1, num_to_do)) status_hook(message) if len(storage_tag_ids) == 1: (storage_tag_id, ) = storage_tag_ids self._Execute( 'INSERT OR IGNORE INTO {} ( tag_id, hash_id ) SELECT ?, hash_id FROM {} WHERE tag_id = ?;' .format(cache_display_pending_mappings_table_name, cache_pending_mappings_table_name), (display_tag_id, storage_tag_id)) pending_delta = self._GetRowCount() else: with self._MakeTemporaryIntegerTable( storage_tag_ids, 'tag_id') as temp_tag_ids_table_name: # temp tags to mappings merged self._Execute( 'INSERT OR IGNORE INTO {} ( tag_id, hash_id ) SELECT DISTINCT ?, hash_id FROM {} CROSS JOIN {} USING ( tag_id );' .format(cache_display_pending_mappings_table_name, temp_tag_ids_table_name, cache_pending_mappings_table_name), (display_tag_id, )) pending_delta = self._GetRowCount() counts_cache_changes.append((display_tag_id, 0, pending_delta)) self.modules_mappings_counts_update.AddCounts( ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id, counts_cache_changes)
def AddFiles(self, file_service_id, tag_service_id, hash_ids, hash_ids_table_name): ( cache_display_current_mappings_table_name, cache_display_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames( file_service_id, tag_service_id) (cache_current_mappings_table_name, cache_deleted_mappings_table_name, cache_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames( file_service_id, tag_service_id) # temp hashes to mappings storage_current_mapping_ids_raw = self._Execute( 'SELECT tag_id, hash_id FROM {} CROSS JOIN {} USING ( hash_id );'. format(hash_ids_table_name, cache_current_mappings_table_name)).fetchall() storage_current_mapping_ids_dict = HydrusData.BuildKeyToSetDict( storage_current_mapping_ids_raw) # temp hashes to mappings storage_pending_mapping_ids_raw = self._Execute( 'SELECT tag_id, hash_id FROM {} CROSS JOIN {} USING ( hash_id );'. format(hash_ids_table_name, cache_pending_mappings_table_name)).fetchall() storage_pending_mapping_ids_dict = HydrusData.BuildKeyToSetDict( storage_pending_mapping_ids_raw) all_storage_tag_ids = set(storage_current_mapping_ids_dict.keys()) all_storage_tag_ids.update(storage_pending_mapping_ids_dict.keys()) storage_tag_ids_to_implies_tag_ids = self.modules_tag_display.GetTagsToImplies( ClientTags.TAG_DISPLAY_ACTUAL, tag_service_id, all_storage_tag_ids) display_tag_ids_to_implied_by_tag_ids = collections.defaultdict(set) for (storage_tag_id, implies_tag_ids) in storage_tag_ids_to_implies_tag_ids.items(): for implies_tag_id in implies_tag_ids: display_tag_ids_to_implied_by_tag_ids[implies_tag_id].add( storage_tag_id) counts_cache_changes = [] # for all display tags implied by the existing storage mappings, add them # btw, when we add files to a specific domain, we know that all inserts are new for (display_tag_id, implied_by_tag_ids ) in display_tag_ids_to_implied_by_tag_ids.items(): display_current_hash_ids = set( itertools.chain.from_iterable( (storage_current_mapping_ids_dict[implied_by_tag_id] for implied_by_tag_id in implied_by_tag_ids))) current_delta = len(display_current_hash_ids) if current_delta > 0: self._ExecuteMany( 'INSERT OR IGNORE INTO ' + cache_display_current_mappings_table_name + ' ( hash_id, tag_id ) VALUES ( ?, ? );', ((hash_id, display_tag_id) for hash_id in display_current_hash_ids)) # display_pending_hash_ids = set( itertools.chain.from_iterable( (storage_pending_mapping_ids_dict[implied_by_tag_id] for implied_by_tag_id in implied_by_tag_ids))) pending_delta = len(display_pending_hash_ids) if pending_delta > 0: self._ExecuteMany( 'INSERT OR IGNORE INTO ' + cache_display_pending_mappings_table_name + ' ( hash_id, tag_id ) VALUES ( ?, ? );', ((hash_id, display_tag_id) for hash_id in display_pending_hash_ids)) # if current_delta > 0 or pending_delta > 0: counts_cache_changes.append( (display_tag_id, current_delta, pending_delta)) if len(counts_cache_changes) > 0: self.modules_mappings_counts_update.AddCounts( ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id, counts_cache_changes)
def RescindPendingMappings(self, file_service_id, tag_service_id, storage_tag_id, hash_ids): ( cache_display_current_mappings_table_name, cache_display_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames( file_service_id, tag_service_id) implies_tag_ids = self.modules_tag_display.GetImplies( ClientTags.TAG_DISPLAY_ACTUAL, tag_service_id, storage_tag_id) implies_tag_ids_to_implied_by_tag_ids = self.modules_tag_display.GetTagsToImpliedBy( ClientTags.TAG_DISPLAY_ACTUAL, tag_service_id, implies_tag_ids, tags_are_ideal=True) ac_counts = collections.Counter() for (display_tag_id, implied_by_tag_ids ) in implies_tag_ids_to_implied_by_tag_ids.items(): # for every tag implied by the storage tag being removed other_implied_by_tag_ids = set(implied_by_tag_ids) other_implied_by_tag_ids.discard(storage_tag_id) if len(other_implied_by_tag_ids) == 0: # nothing else implies this tag on display, so can just straight up delete self._ExecuteMany( 'DELETE FROM {} WHERE tag_id = ? AND hash_id = ?;'.format( cache_display_pending_mappings_table_name), ((display_tag_id, hash_id) for hash_id in hash_ids)) num_rescinded = self._GetRowCount() else: # other things imply this tag on display, so we need to check storage to see what else has it statuses_to_table_names = self.modules_mappings_storage.GetFastestStorageMappingTableNames( file_service_id, tag_service_id) mappings_table_name = statuses_to_table_names[ HC.CONTENT_STATUS_PENDING] with self._MakeTemporaryIntegerTable( other_implied_by_tag_ids, 'tag_id') as temp_table_name: # storage mappings to temp other tag ids # delete mappings where it shouldn't exist for other reasons lad delete = 'DELETE FROM {} WHERE tag_id = ? AND hash_id = ? AND NOT EXISTS ( SELECT 1 FROM {} CROSS JOIN {} USING ( tag_id ) WHERE hash_id = ? )'.format( cache_display_pending_mappings_table_name, mappings_table_name, temp_table_name) self._ExecuteMany(delete, ((display_tag_id, hash_id, hash_id) for hash_id in hash_ids)) num_rescinded = self._GetRowCount() if num_rescinded > 0: ac_counts[display_tag_id] += num_rescinded if len(ac_counts) > 0: counts_cache_changes = [(tag_id, 0, pending_delta) for (tag_id, pending_delta) in ac_counts.items()] self.modules_mappings_counts_update.ReduceCounts( ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id, counts_cache_changes)
def GetHashIdsFromTagIds( self, tag_display_type: int, file_service_key: bytes, tag_search_context: ClientSearch.TagSearchContext, tag_ids: typing.Collection[ int ], hash_ids = None, hash_ids_table_name = None, job_key = None ): do_hash_table_join = False if hash_ids_table_name is not None and hash_ids is not None: tag_service_id = self.modules_services.GetServiceId( tag_search_context.service_key ) file_service_id = self.modules_services.GetServiceId( file_service_key ) estimated_count = self.modules_mappings_counts.GetAutocompleteCountEstimate( tag_display_type, tag_service_id, file_service_id, tag_ids, tag_search_context.include_current_tags, tag_search_context.include_pending_tags ) if ClientDBMappingsStorage.DoingAFileJoinTagSearchIsFaster( len( hash_ids ), estimated_count ): do_hash_table_join = True result_hash_ids = set() table_names = self.modules_tag_search.GetMappingTables( tag_display_type, file_service_key, tag_search_context ) cancelled_hook = None if job_key is not None: cancelled_hook = job_key.IsCancelled if len( tag_ids ) == 1: ( tag_id, ) = tag_ids if do_hash_table_join: # temp hashes to mappings queries = [ 'SELECT hash_id FROM {} CROSS JOIN {} USING ( hash_id ) WHERE tag_id = ?'.format( hash_ids_table_name, table_name ) for table_name in table_names ] else: queries = [ 'SELECT hash_id FROM {} WHERE tag_id = ?;'.format( table_name ) for table_name in table_names ] for query in queries: cursor = self._Execute( query, ( tag_id, ) ) result_hash_ids.update( self._STI( HydrusDB.ReadFromCancellableCursor( cursor, 1024, cancelled_hook ) ) ) else: with self._MakeTemporaryIntegerTable( tag_ids, 'tag_id' ) as temp_tag_ids_table_name: if do_hash_table_join: # temp hashes to mappings to temp tags # old method, does not do EXISTS efficiently, it makes a list instead and checks that # queries = [ 'SELECT hash_id FROM {} WHERE EXISTS ( SELECT 1 FROM {} CROSS JOIN {} USING ( tag_id ) WHERE {}.hash_id = {}.hash_id );'.format( hash_ids_table_name, table_name, temp_tag_ids_table_name, table_name, hash_ids_table_name ) for table_name in table_names ] # new method, this seems to actually do the correlated scalar subquery, although it does seem to be sqlite voodoo queries = [ 'SELECT hash_id FROM {} WHERE EXISTS ( SELECT 1 FROM {} WHERE {}.hash_id = {}.hash_id AND EXISTS ( SELECT 1 FROM {} WHERE {}.tag_id = {}.tag_id ) );'.format( hash_ids_table_name, table_name, table_name, hash_ids_table_name, temp_tag_ids_table_name, table_name, temp_tag_ids_table_name ) for table_name in table_names ] else: # temp tags to mappings queries = [ 'SELECT hash_id FROM {} CROSS JOIN {} USING ( tag_id );'.format( temp_tag_ids_table_name, table_name ) for table_name in table_names ] for query in queries: cursor = self._Execute( query ) result_hash_ids.update( self._STI( HydrusDB.ReadFromCancellableCursor( cursor, 1024, cancelled_hook ) ) ) return result_hash_ids