Esempio n. 1
0
    def DeleteImplications(self,
                           file_service_id,
                           tag_service_id,
                           implication_tag_ids,
                           tag_id,
                           status_hook=None):

        if len(implication_tag_ids) == 0:

            return

        statuses_to_count_delta = collections.Counter()

        (cache_current_mappings_table_name, cache_deleted_mappings_table_name,
         cache_pending_mappings_table_name
         ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames(
             file_service_id, tag_service_id)
        (
            cache_display_current_mappings_table_name,
            cache_display_pending_mappings_table_name
        ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames(
            file_service_id, tag_service_id)

        remaining_implication_tag_ids = set(
            self.modules_tag_display.GetImpliedBy(
                ClientTags.TAG_DISPLAY_ACTUAL, tag_service_id,
                tag_id)).difference(implication_tag_ids)

        (
            current_implication_tag_ids, current_implication_tag_ids_weight,
            pending_implication_tag_ids, pending_implication_tag_ids_weight
        ) = self.modules_mappings_counts.GetCurrentPendingPositiveCountsAndWeights(
            ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id,
            implication_tag_ids)
        (
            current_remaining_implication_tag_ids,
            current_remaining_implication_tag_ids_weight,
            pending_remaining_implication_tag_ids,
            pending_remaining_implication_tag_ids_weight
        ) = self.modules_mappings_counts.GetCurrentPendingPositiveCountsAndWeights(
            ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id,
            remaining_implication_tag_ids)

        jobs = []

        jobs.append(
            (HC.CONTENT_STATUS_CURRENT,
             cache_display_current_mappings_table_name,
             cache_current_mappings_table_name, current_implication_tag_ids,
             current_implication_tag_ids_weight,
             current_remaining_implication_tag_ids,
             current_remaining_implication_tag_ids_weight))
        jobs.append(
            (HC.CONTENT_STATUS_PENDING,
             cache_display_pending_mappings_table_name,
             cache_pending_mappings_table_name, pending_implication_tag_ids,
             pending_implication_tag_ids_weight,
             pending_remaining_implication_tag_ids,
             pending_remaining_implication_tag_ids_weight))

        for (status, cache_display_mappings_table_name,
             cache_mappings_table_name, removee_tag_ids,
             removee_tag_ids_weight, keep_tag_ids,
             keep_tag_ids_weight) in jobs:

            if removee_tag_ids_weight == 0:

                # nothing to remove, so nothing to do!

                continue

            # ultimately here, we are doing "delete all display mappings with hash_ids that have a storage mapping for a removee tag and no storage mappings for a keep tag
            # in order to reduce overhead, we go full meme and do a bunch of different situations

            with self._MakeTemporaryIntegerTable(
                [], 'tag_id') as temp_removee_tag_ids_table_name:

                with self._MakeTemporaryIntegerTable(
                    [], 'tag_id') as temp_keep_tag_ids_table_name:

                    if len(removee_tag_ids) == 1:

                        (removee_tag_id, ) = removee_tag_ids

                        hash_id_in_storage_remove = 'hash_id IN ( SELECT hash_id FROM {} WHERE tag_id = {} )'.format(
                            cache_mappings_table_name, removee_tag_id)

                    else:

                        self._ExecuteMany(
                            'INSERT INTO {} ( tag_id ) VALUES ( ? );'.format(
                                temp_removee_tag_ids_table_name),
                            ((removee_tag_id, )
                             for removee_tag_id in removee_tag_ids))

                        hash_id_in_storage_remove = 'hash_id IN ( SELECT DISTINCT hash_id FROM {} CROSS JOIN {} USING ( tag_id ) )'.format(
                            temp_removee_tag_ids_table_name,
                            cache_mappings_table_name)

                    if keep_tag_ids_weight == 0:

                        predicates_phrase = hash_id_in_storage_remove

                    else:

                        # WARNING, WARNING: Big Brain Query, potentially great/awful
                        # note that in the 'clever/file join' situation, the number of total mappings is many, but we are deleting a few
                        # we want to precisely scan the status of the potential hashes to delete, not scan through them all to see what not to do
                        # therefore, we do NOT EXISTS, which just scans the parts, rather than NOT IN, which does the whole query and then checks against all results

                        if len(keep_tag_ids) == 1:

                            (keep_tag_id, ) = keep_tag_ids

                            if ClientDBMappingsStorage.DoingAFileJoinTagSearchIsFaster(
                                    removee_tag_ids_weight,
                                    keep_tag_ids_weight):

                                hash_id_not_in_storage_keep = 'NOT EXISTS ( SELECT 1 FROM {} WHERE {}.hash_id = {}.hash_id and tag_id = {} )'.format(
                                    cache_mappings_table_name,
                                    cache_display_mappings_table_name,
                                    cache_mappings_table_name, keep_tag_id)

                            else:

                                hash_id_not_in_storage_keep = 'hash_id NOT IN ( SELECT hash_id FROM {} WHERE tag_id = {} )'.format(
                                    cache_mappings_table_name, keep_tag_id)

                        else:

                            self._ExecuteMany(
                                'INSERT INTO {} ( tag_id ) VALUES ( ? );'.
                                format(temp_keep_tag_ids_table_name),
                                ((keep_tag_id, )
                                 for keep_tag_id in keep_tag_ids))

                            if ClientDBMappingsStorage.DoingAFileJoinTagSearchIsFaster(
                                    removee_tag_ids_weight,
                                    keep_tag_ids_weight):

                                # (files to) mappings to temp tags
                                hash_id_not_in_storage_keep = 'NOT EXISTS ( SELECT 1 FROM {} CROSS JOIN {} USING ( tag_id ) WHERE {}.hash_id = {}.hash_id )'.format(
                                    cache_mappings_table_name,
                                    temp_keep_tag_ids_table_name,
                                    cache_display_mappings_table_name,
                                    cache_mappings_table_name)

                            else:

                                # temp tags to mappings
                                hash_id_not_in_storage_keep = ' hash_id NOT IN ( SELECT DISTINCT hash_id FROM {} CROSS JOIN {} USING ( tag_id ) )'.format(
                                    temp_keep_tag_ids_table_name,
                                    cache_mappings_table_name)

                        predicates_phrase = '{} AND {}'.format(
                            hash_id_in_storage_remove,
                            hash_id_not_in_storage_keep)

                    query = 'DELETE FROM {} WHERE tag_id = {} AND {};'.format(
                        cache_display_mappings_table_name, tag_id,
                        predicates_phrase)

                    self._Execute(query)

                    statuses_to_count_delta[status] = self._GetRowCount()

        current_delta = statuses_to_count_delta[HC.CONTENT_STATUS_CURRENT]
        pending_delta = statuses_to_count_delta[HC.CONTENT_STATUS_PENDING]

        if current_delta > 0 or pending_delta > 0:

            counts_cache_changes = ((tag_id, current_delta, pending_delta), )

            self.modules_mappings_counts_update.ReduceCounts(
                ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id,
                counts_cache_changes)
Esempio n. 2
0
 def GetHashIdsFromTagIds( self, tag_display_type: int, file_service_key: bytes, tag_search_context: ClientSearch.TagSearchContext, tag_ids: typing.Collection[ int ], hash_ids = None, hash_ids_table_name = None, job_key = None ):
     
     do_hash_table_join = False
     
     if hash_ids_table_name is not None and hash_ids is not None:
         
         tag_service_id = self.modules_services.GetServiceId( tag_search_context.service_key )
         file_service_id = self.modules_services.GetServiceId( file_service_key )
         
         estimated_count = self.modules_mappings_counts.GetAutocompleteCountEstimate( tag_display_type, tag_service_id, file_service_id, tag_ids, tag_search_context.include_current_tags, tag_search_context.include_pending_tags )
         
         if ClientDBMappingsStorage.DoingAFileJoinTagSearchIsFaster( len( hash_ids ), estimated_count ):
             
             do_hash_table_join = True
             
         
     
     result_hash_ids = set()
     
     table_names = self.modules_tag_search.GetMappingTables( tag_display_type, file_service_key, tag_search_context )
     
     cancelled_hook = None
     
     if job_key is not None:
         
         cancelled_hook = job_key.IsCancelled
         
     
     if len( tag_ids ) == 1:
         
         ( tag_id, ) = tag_ids
         
         if do_hash_table_join:
             
             # temp hashes to mappings
             queries = [ 'SELECT hash_id FROM {} CROSS JOIN {} USING ( hash_id ) WHERE tag_id = ?'.format( hash_ids_table_name, table_name ) for table_name in table_names ]
             
         else:
             
             queries = [ 'SELECT hash_id FROM {} WHERE tag_id = ?;'.format( table_name ) for table_name in table_names ]
             
         
         for query in queries:
             
             cursor = self._Execute( query, ( tag_id, ) )
             
             result_hash_ids.update( self._STI( HydrusDB.ReadFromCancellableCursor( cursor, 1024, cancelled_hook ) ) )
             
         
     else:
         
         with self._MakeTemporaryIntegerTable( tag_ids, 'tag_id' ) as temp_tag_ids_table_name:
             
             if do_hash_table_join:
                 
                 # temp hashes to mappings to temp tags
                 # old method, does not do EXISTS efficiently, it makes a list instead and checks that
                 # queries = [ 'SELECT hash_id FROM {} WHERE EXISTS ( SELECT 1 FROM {} CROSS JOIN {} USING ( tag_id ) WHERE {}.hash_id = {}.hash_id );'.format( hash_ids_table_name, table_name, temp_tag_ids_table_name, table_name, hash_ids_table_name ) for table_name in table_names ]
                 # new method, this seems to actually do the correlated scalar subquery, although it does seem to be sqlite voodoo
                 queries = [ 'SELECT hash_id FROM {} WHERE EXISTS ( SELECT 1 FROM {} WHERE {}.hash_id = {}.hash_id AND EXISTS ( SELECT 1 FROM {} WHERE {}.tag_id = {}.tag_id ) );'.format( hash_ids_table_name, table_name, table_name, hash_ids_table_name, temp_tag_ids_table_name, table_name, temp_tag_ids_table_name ) for table_name in table_names ]
                 
             else:
                 
                 # temp tags to mappings
                 queries = [ 'SELECT hash_id FROM {} CROSS JOIN {} USING ( tag_id );'.format( temp_tag_ids_table_name, table_name ) for table_name in table_names ]
                 
             
             for query in queries:
                 
                 cursor = self._Execute( query )
                 
                 result_hash_ids.update( self._STI( HydrusDB.ReadFromCancellableCursor( cursor, 1024, cancelled_hook ) ) )
                 
             
         
     
     return result_hash_ids