Python HydrusDB Examples, hydrus.core.HydrusDB Python Examples

Example #1

0

Show file

File: ClientDBFilesMetadataBasic.py Project: Suika/hydrus-build-test

    def GetTotalSize(self, hash_ids: typing.Collection[int]) -> int:

        if len(hash_ids) == 1:

            (hash_id, ) = hash_ids

            result = self._c.execute(
                'SELECT size FROM files_info WHERE hash_id = ?;',
                (hash_id, )).fetchone()

        else:

            with HydrusDB.TemporaryIntegerTable(
                    self._c, hash_ids, 'hash_id') as temp_hash_ids_table_name:

                result = self._c.execute(
                    'SELECT SUM( size ) FROM {} CROSS JOIN files_info USING ( hash_id );'
                    .format(temp_hash_ids_table_name)).fetchone()

        if result is None:

            return 0

        (total_size, ) = result

        return total_size

Example #2

0

Show file

File: ClientDBFilesStorage.py Project: thatfuckingbird/hydrus

    def FilterAllPendingHashIds(self, hash_ids, just_these_service_ids=None):

        if just_these_service_ids is None:

            service_ids = self.modules_services.GetServiceIds(
                HC.SPECIFIC_FILE_SERVICES)

        else:

            service_ids = just_these_service_ids

        pending_hash_ids = set()

        with HydrusDB.TemporaryIntegerTable(
                self._c, hash_ids, 'hash_id') as temp_hash_ids_table_name:

            for service_id in service_ids:

                pending_files_table_name = GenerateFilesTableName(
                    service_id, HC.CONTENT_STATUS_PENDING)

                hash_id_iterator = self._STI(
                    self._c.execute(
                        'SELECT hash_id FROM {} CROSS JOIN {} USING ( hash_id );'
                        .format(temp_hash_ids_table_name,
                                pending_files_table_name)))

                pending_hash_ids.update(hash_id_iterator)

        return pending_hash_ids

Example #3

0

Show file

 def _PopulateTagIdsToTagsCache( self, tag_ids ):
     
     if len( self._tag_ids_to_tags_cache ) > 100000:
         
         if not isinstance( tag_ids, set ):
             
             tag_ids = set( tag_ids )
             
         
         self._tag_ids_to_tags_cache = { tag_id : tag for ( tag_id, tag ) in self._tag_ids_to_tags_cache.items() if tag_id in tag_ids }
         
     
     uncached_tag_ids = { tag_id for tag_id in tag_ids if tag_id not in self._tag_ids_to_tags_cache }
     
     if len( uncached_tag_ids ) > 0:
         
         if len( uncached_tag_ids ) == 1:
             
             ( uncached_tag_id, ) = uncached_tag_ids
             
             rows = self._c.execute( 'SELECT tag_id, namespace, subtag FROM tags NATURAL JOIN namespaces NATURAL JOIN subtags WHERE tag_id = ?;', ( uncached_tag_id, ) ).fetchall()
             
         else:
             
             with HydrusDB.TemporaryIntegerTable( self._c, uncached_tag_ids, 'tag_id' ) as temp_table_name:
                 
                 # temp tag_ids to tags to subtags and namespaces
                 rows = self._c.execute( 'SELECT tag_id, namespace, subtag FROM {} CROSS JOIN tags USING ( tag_id ) CROSS JOIN subtags USING ( subtag_id ) CROSS JOIN namespaces USING ( namespace_id );'.format( temp_table_name ) ).fetchall()
                 
             
         
         uncached_tag_ids_to_tags = { tag_id : HydrusTags.CombineTag( namespace, subtag ) for ( tag_id, namespace, subtag ) in rows }
         
         if len( uncached_tag_ids_to_tags ) < len( uncached_tag_ids ):
             
             for tag_id in uncached_tag_ids:
                 
                 if tag_id not in uncached_tag_ids_to_tags:
                     
                     tag = 'unknown tag:' + HydrusData.GenerateKey().hex()
                     
                     ( namespace, subtag ) = HydrusTags.SplitTag( tag )
                     
                     namespace_id = self.GetNamespaceId( namespace )
                     subtag_id = self.GetSubtagId( subtag )
                     
                     self._c.execute( 'REPLACE INTO tags ( tag_id, namespace_id, subtag_id ) VALUES ( ?, ?, ? );', ( tag_id, namespace_id, subtag_id ) )
                     
                     uncached_tag_ids_to_tags[ tag_id ] = tag
                     
                 
             
         
         self._tag_ids_to_tags_cache.update( uncached_tag_ids_to_tags )

Example #4

0

Show file

 def GetHashIdsThatHaveTagsSimpleLocation( self, tag_display_type: int, file_service_key: bytes, tag_search_context: ClientSearch.TagSearchContext, namespace_ids_table_name = None, hash_ids_table_name = None, job_key = None ):
     
     mapping_and_tag_table_names = self.modules_tag_search.GetMappingAndTagTables( tag_display_type, file_service_key, tag_search_context )
     
     if hash_ids_table_name is None:
         
         if namespace_ids_table_name is None:
             
             # hellmode
             queries = [ 'SELECT DISTINCT hash_id FROM {};'.format( mappings_table_name ) for ( mappings_table_name, tags_table_name ) in mapping_and_tag_table_names ]
             
         else:
             
             # temp namespaces to tags to mappings
             queries = [ 'SELECT DISTINCT hash_id FROM {} CROSS JOIN {} USING ( namespace_id ) CROSS JOIN {} USING ( tag_id );'.format( namespace_ids_table_name, tags_table_name, mappings_table_name ) for ( mappings_table_name, tags_table_name ) in mapping_and_tag_table_names ]
             
         
     else:
         
         if namespace_ids_table_name is None:
             
             queries = [ 'SELECT hash_id FROM {} WHERE EXISTS ( SELECT 1 FROM {} WHERE {}.hash_id = {}.hash_id );'.format( hash_ids_table_name, mappings_table_name, mappings_table_name, hash_ids_table_name ) for ( mappings_table_name, tags_table_name ) in mapping_and_tag_table_names ]
             
         else:
             
             # temp hashes to mappings to tags to temp namespaces
             # this was originally a 'WHERE EXISTS' thing, but doing that on a three way cross join is too complex for that to work well
             # let's hope DISTINCT can save time too
             queries = [ 'SELECT DISTINCT hash_id FROM {} CROSS JOIN {} USING ( hash_id ) CROSS JOIN {} USING ( tag_id ) CROSS JOIN {} USING ( namespace_id );'.format( hash_ids_table_name, mappings_table_name, tags_table_name, namespace_ids_table_name ) for ( mappings_table_name, tags_table_name ) in mapping_and_tag_table_names ]
             
         
     
     cancelled_hook = None
     
     if job_key is not None:
         
         cancelled_hook = job_key.IsCancelled
         
     
     nonzero_tag_hash_ids = set()
     
     for query in queries:
         
         cursor = self._Execute( query )
         
         nonzero_tag_hash_ids.update( self._STI( HydrusDB.ReadFromCancellableCursor( cursor, 10240, cancelled_hook ) ) )
         
         if job_key is not None and job_key.IsCancelled():
             
             return set()
             
         
     
     return nonzero_tag_hash_ids

Example #5

0

Show file

    def RegenerateSearchableSubtagMap(self,
                                      file_service_id,
                                      tag_service_id,
                                      status_hook=None):

        subtags_fts4_table_name = self.GetSubtagsFTS4TableName(
            file_service_id, tag_service_id)
        subtags_searchable_map_table_name = self.GetSubtagsSearchableMapTableName(
            file_service_id, tag_service_id)

        self._Execute(
            'DELETE FROM {};'.format(subtags_searchable_map_table_name))

        query = 'SELECT docid FROM {};'.format(subtags_fts4_table_name)

        BLOCK_SIZE = 10000

        for (group_of_subtag_ids, num_done,
             num_to_do) in HydrusDB.ReadLargeIdQueryInSeparateChunks(
                 self._c, query, BLOCK_SIZE):

            for subtag_id in group_of_subtag_ids:

                result = self._Execute(
                    'SELECT subtag FROM subtags WHERE subtag_id = ?;',
                    (subtag_id, )).fetchone()

                if result is None:

                    continue

                (subtag, ) = result

                searchable_subtag = ClientSearch.ConvertSubtagToSearchable(
                    subtag)

                if searchable_subtag != subtag:

                    searchable_subtag_id = self.modules_tags.GetSubtagId(
                        searchable_subtag)

                    self._Execute(
                        'INSERT OR IGNORE INTO {} ( subtag_id, searchable_subtag_id ) VALUES ( ?, ? );'
                        .format(subtags_searchable_map_table_name),
                        (subtag_id, searchable_subtag_id))

            message = HydrusData.ConvertValueRangeToPrettyString(
                num_done, num_to_do)

            HG.client_controller.frame_splash_status.SetSubtext(message)

            if status_hook is not None:

                status_hook(message)

Example #6

0

Show file

File: ClientDBFilesStorage.py Project: thatfuckingbird/hydrus

    def ClearDeleteRecord(self, service_id, hash_ids):

        deleted_files_table_name = GenerateFilesTableName(
            service_id, HC.CONTENT_STATUS_DELETED)

        self._c.executemany(
            'DELETE FROM {} WHERE hash_id = ?;'.format(
                deleted_files_table_name),
            ((hash_id, ) for hash_id in hash_ids))

        num_deleted = HydrusDB.GetRowCount(self._c)

        return num_deleted

Example #7

0

Show file

File: ClientDBFilesStorage.py Project: thatfuckingbird/hydrus

    def GetUndeleteRows(self, service_id, hash_ids):

        deleted_files_table_name = GenerateFilesTableName(
            service_id, HC.CONTENT_STATUS_DELETED)

        with HydrusDB.TemporaryIntegerTable(
                self._c, hash_ids, 'hash_id') as temp_hash_ids_table_name:

            rows = self._c.execute(
                'SELECT hash_id, original_timestamp FROM {} CROSS JOIN {} USING ( hash_id );'
                .format(temp_hash_ids_table_name,
                        deleted_files_table_name)).fetchall()

        return rows

Example #8

0

Show file

File: ClientDBFilesStorage.py Project: thatfuckingbird/hydrus

    def GetCurrentHashIdsToTimestamps(self, service_id, hash_ids):

        current_files_table_name = GenerateFilesTableName(
            service_id, HC.CONTENT_STATUS_CURRENT)

        with HydrusDB.TemporaryIntegerTable(
                self._c, hash_ids, 'hash_id') as temp_hash_ids_table_name:

            rows = dict(
                self._c.execute(
                    'SELECT hash_id, timestamp FROM {} CROSS JOIN {} USING ( hash_id );'
                    .format(temp_hash_ids_table_name,
                            current_files_table_name)))

        return rows

Example #9

0

Show file

File: ClientDBFilesStorage.py Project: thatfuckingbird/hydrus

    def RecordDeleteFiles(self, service_id, insert_rows):

        deleted_files_table_name = GenerateFilesTableName(
            service_id, HC.CONTENT_STATUS_DELETED)

        now = HydrusData.GetNow()

        self._c.executemany(
            'INSERT OR IGNORE INTO {} ( hash_id, timestamp, original_timestamp ) VALUES ( ?, ?, ? );'
            .format(deleted_files_table_name),
            ((hash_id, now, original_timestamp)
             for (hash_id, original_timestamp) in insert_rows))

        num_new_deleted_files = HydrusDB.GetRowCount(self._c)

        return num_new_deleted_files

Example #10

0

Show file

File: ClientDBFilesStorage.py Project: thatfuckingbird/hydrus

    def FilterPendingHashIds(self, service_id, hash_ids):

        if service_id == self.modules_services.combined_file_service_id:

            return set(hash_ids)

        with HydrusDB.TemporaryIntegerTable(
                self._c, hash_ids, 'hash_id') as temp_hash_ids_table_name:

            pending_files_table_name = GenerateFilesTableName(
                service_id, HC.CONTENT_STATUS_PENDING)

            pending_hash_ids = self._STS(
                self._c.execute(
                    'SELECT hash_id FROM {} CROSS JOIN {} USING ( hash_id );'.
                    format(temp_hash_ids_table_name,
                           pending_files_table_name)))

        return pending_hash_ids

Example #11

0

Show file

File: ClientDBFilesStorage.py Project: thatfuckingbird/hydrus

    def AddFiles(self, service_id, insert_rows):

        (current_files_table_name, deleted_files_table_name,
         pending_files_table_name,
         petitioned_files_table_name) = GenerateFilesTableNames(service_id)

        self._c.executemany(
            'INSERT OR IGNORE INTO {} VALUES ( ?, ? );'.format(
                current_files_table_name),
            ((hash_id, timestamp) for (hash_id, timestamp) in insert_rows))

        self._c.executemany(
            'DELETE FROM {} WHERE hash_id = ?;'.format(
                pending_files_table_name),
            ((hash_id, ) for (hash_id, timestamp) in insert_rows))

        pending_changed = HydrusDB.GetRowCount(self._c) > 0

        return pending_changed

Example #12

0

Show file

 def AssociatePHashes( self, hash_id, phashes ):
     
     phash_ids = set()
     
     for phash in phashes:
         
         phash_id = self._GetPHashId( phash )
         
         phash_ids.add( phash_id )
         
     
     self._c.executemany( 'INSERT OR IGNORE INTO shape_perceptual_hash_map ( phash_id, hash_id ) VALUES ( ?, ? );', ( ( phash_id, hash_id ) for phash_id in phash_ids ) )
     
     if HydrusDB.GetRowCount( self._c ) > 0:
         
         self._c.execute( 'REPLACE INTO shape_search_cache ( hash_id, searched_distance ) VALUES ( ?, ? );', ( hash_id, None ) )
         
     
     return phash_ids

Example #13

0

Show file

File: ClientDBFilesStorage.py Project: thatfuckingbird/hydrus

    def RemoveFiles(self, service_id, hash_ids):

        (current_files_table_name, deleted_files_table_name,
         pending_files_table_name,
         petitioned_files_table_name) = GenerateFilesTableNames(service_id)

        self._c.executemany(
            'DELETE FROM {} WHERE hash_id = ?;'.format(
                current_files_table_name),
            ((hash_id, ) for hash_id in hash_ids))

        self._c.executemany(
            'DELETE FROM {} WHERE hash_id = ?;'.format(
                petitioned_files_table_name),
            ((hash_id, ) for hash_id in hash_ids))

        pending_changed = HydrusDB.GetRowCount(self._c) > 0

        return pending_changed

Example #14

0

Show file

File: ClientDBDefinitionsCache.py Project: Suika/hydrus-build-test

 def _PopulateHashIdsToHashesCache( self, hash_ids ):
     
     if len( self._hash_ids_to_hashes_cache ) > 100000:
         
         if not isinstance( hash_ids, set ):
             
             hash_ids = set( hash_ids )
             
         
         self._hash_ids_to_hashes_cache = { hash_id : hash for ( hash_id, hash ) in self._hash_ids_to_hashes_cache.items() if hash_id in hash_ids }
         
     
     uncached_hash_ids = { hash_id for hash_id in hash_ids if hash_id not in self._hash_ids_to_hashes_cache }
     
     if len( uncached_hash_ids ) > 0:
         
         if len( uncached_hash_ids ) == 1:
             
             ( uncached_hash_id, ) = uncached_hash_ids
             
             # this makes 0 or 1 rows, so do fetchall rather than fetchone
             local_uncached_hash_ids_to_hashes = { hash_id : hash for ( hash_id, hash ) in self._c.execute( 'SELECT hash_id, hash FROM local_hashes_cache WHERE hash_id = ?;', ( uncached_hash_id, ) ) }
             
         else:
             
             with HydrusDB.TemporaryIntegerTable( self._c, uncached_hash_ids, 'hash_id' ) as temp_table_name:
                 
                 # temp hash_ids to actual hashes
                 local_uncached_hash_ids_to_hashes = { hash_id : hash for ( hash_id, hash ) in self._c.execute( 'SELECT hash_id, hash FROM {} CROSS JOIN local_hashes_cache USING ( hash_id );'.format( temp_table_name ) ) }
                 
             
         
         self._hash_ids_to_hashes_cache.update( local_uncached_hash_ids_to_hashes )
         
         uncached_hash_ids = { hash_id for hash_id in uncached_hash_ids if hash_id not in self._hash_ids_to_hashes_cache }
         
     
     if len( uncached_hash_ids ) > 0:
         
         hash_ids_to_hashes = self.modules_hashes.GetHashIdsToHashes( hash_ids = uncached_hash_ids )
         
         self._hash_ids_to_hashes_cache.update( hash_ids_to_hashes )

Example #15

0

Show file

File: ClientDBDefinitionsCache.py Project: Suika/hydrus-build-test

 def _PopulateTagIdsToTagsCache( self, tag_ids ):
     
     if len( self._tag_ids_to_tags_cache ) > 100000:
         
         if not isinstance( tag_ids, set ):
             
             tag_ids = set( tag_ids )
             
         
         self._tag_ids_to_tags_cache = { tag_id : tag for ( tag_id, tag ) in self._tag_ids_to_tags_cache.items() if tag_id in tag_ids }
         
     
     uncached_tag_ids = { tag_id for tag_id in tag_ids if tag_id not in self._tag_ids_to_tags_cache }
     
     if len( uncached_tag_ids ) > 0:
         
         if len( uncached_tag_ids ) == 1:
             
             ( uncached_tag_id, ) = uncached_tag_ids
             
             # this makes 0 or 1 rows, so do fetchall rather than fetchone
             local_uncached_tag_ids_to_tags = { tag_id : tag for ( tag_id, tag ) in self._c.execute( 'SELECT tag_id, tag FROM local_tags_cache WHERE tag_id = ?;', ( uncached_tag_id, ) ) }
             
         else:
             
             with HydrusDB.TemporaryIntegerTable( self._c, uncached_tag_ids, 'tag_id' ) as temp_table_name:
                 
                 # temp tag_ids to actual tags
                 local_uncached_tag_ids_to_tags = { tag_id : tag for ( tag_id, tag ) in self._c.execute( 'SELECT tag_id, tag FROM {} CROSS JOIN local_tags_cache USING ( tag_id );'.format( temp_table_name ) ) }
                 
             
         
         self._tag_ids_to_tags_cache.update( local_uncached_tag_ids_to_tags )
         
         uncached_tag_ids = { tag_id for tag_id in uncached_tag_ids if tag_id not in self._tag_ids_to_tags_cache }
         
     
     if len( uncached_tag_ids ) > 0:
         
         tag_ids_to_tags = self.modules_tags.GetTagIdsToTags( tag_ids = uncached_tag_ids )
         
         self._tag_ids_to_tags_cache.update( tag_ids_to_tags )

Example #16

0

Show file

    def Repopulate(self):

        self.ClearCache()

        tag_service_ids = self.modules_services.GetServiceIds(
            HC.REAL_TAG_SERVICES)

        queries = [
            self.modules_mappings_counts.GetQueryPhraseForCurrentTagIds(
                ClientTags.TAG_DISPLAY_STORAGE,
                self.modules_services.combined_local_file_service_id,
                tag_service_id) for tag_service_id in tag_service_ids
        ]

        full_query = '{};'.format(' UNION '.join(queries))

        for (block_of_tag_ids, num_done,
             num_to_do) in HydrusDB.ReadLargeIdQueryInSeparateChunks(
                 self._c, full_query, 1024):

            self.AddTagIdsToCache(block_of_tag_ids)

Example #17

0

Show file

File: ClientDBFilesMetadataBasic.py Project: Suika/hydrus-build-test

    def GetNumViewable(self, hash_ids: typing.Collection[int]) -> int:

        if len(hash_ids) == 1:

            (hash_id, ) = hash_ids

            result = self._STL(
                self._c.execute(
                    'SELECT mime FROM files_info WHERE hash_id = ?;',
                    (hash_id, )))

        else:

            with HydrusDB.TemporaryIntegerTable(
                    self._c, hash_ids, 'hash_id') as temp_hash_ids_table_name:

                result = self._STL(
                    self._c.execute(
                        'SELECT mime FROM {} CROSS JOIN files_info USING ( hash_id );'
                        .format(temp_hash_ids_table_name)))

        return sum((1 for mime in result if mime in HC.SEARCHABLE_MIMES))

Example #18

0

Show file

File: ClientDBFilesStorage.py Project: thatfuckingbird/hydrus

    def GetServiceIdCounts(self, hash_ids) -> typing.Dict[int, int]:

        with HydrusDB.TemporaryIntegerTable(
                self._c, hash_ids, 'hash_id') as temp_hash_ids_table_name:

            service_ids_to_counts = {}

            for service_id in self.modules_services.GetServiceIds(
                    HC.SPECIFIC_FILE_SERVICES):

                current_files_table_name = GenerateFilesTableName(
                    service_id, HC.CONTENT_STATUS_CURRENT)

                # temp hashes to files
                (count, ) = self._c.execute(
                    'SELECT COUNT( * ) FROM {} CROSS JOIN {} USING ( hash_id );'
                    .format(temp_hash_ids_table_name,
                            current_files_table_name)).fetchone()

                service_ids_to_counts[service_id] = count

        return service_ids_to_counts

Example #19

0

Show file

File: ClientDBSimilarFiles.py Project: Wyrrrd/suika-hydrus-server

    def MaintainTree(self,
                     maintenance_mode=HC.MAINTENANCE_FORCED,
                     job_key=None,
                     stop_time=None):

        time_started = HydrusData.GetNow()
        pub_job_key = False
        job_key_pubbed = False

        if job_key is None:

            job_key = ClientThreading.JobKey(cancellable=True)

            pub_job_key = True

        try:

            job_key.SetVariable('popup_title',
                                'similar files metadata maintenance')

            rebalance_phash_ids = self._STL(
                self._c.execute(
                    'SELECT phash_id FROM shape_maintenance_branch_regen;'))

            num_to_do = len(rebalance_phash_ids)

            while len(rebalance_phash_ids) > 0:

                if pub_job_key and not job_key_pubbed and HydrusData.TimeHasPassed(
                        time_started + 5):

                    HG.client_controller.pub('modal_message', job_key)

                    job_key_pubbed = True

                (i_paused, should_quit) = job_key.WaitIfNeeded()

                should_stop = HG.client_controller.ShouldStopThisWork(
                    maintenance_mode, stop_time=stop_time)

                if should_quit or should_stop:

                    return

                num_done = num_to_do - len(rebalance_phash_ids)

                text = 'rebalancing similar file metadata - ' + HydrusData.ConvertValueRangeToPrettyString(
                    num_done, num_to_do)

                HG.client_controller.frame_splash_status.SetSubtext(text)
                job_key.SetVariable('popup_text_1', text)
                job_key.SetVariable('popup_gauge_1', (num_done, num_to_do))

                with HydrusDB.TemporaryIntegerTable(
                        self._c, rebalance_phash_ids,
                        'phash_id') as temp_table_name:

                    # temp phashes to tree
                    (biggest_phash_id, ) = self._c.execute(
                        'SELECT phash_id FROM {} CROSS JOIN shape_vptree USING ( phash_id ) ORDER BY inner_population + outer_population DESC;'
                        .format(temp_table_name)).fetchone()

                self._RegenerateBranch(job_key, biggest_phash_id)

                rebalance_phash_ids = self._STL(
                    self._c.execute(
                        'SELECT phash_id FROM shape_maintenance_branch_regen;')
                )

        finally:

            job_key.SetVariable('popup_text_1', 'done!')
            job_key.DeleteVariable('popup_gauge_1')
            job_key.DeleteVariable(
                'popup_text_2')  # used in the regenbranch call

            job_key.Finish()

            job_key.Delete(5)

Example #20

0

Show file

File: ClientDBMappingsCacheCombinedFilesStorage.py Project: pianomanx/hydrus

    def Generate(self, tag_service_id):

        self.modules_mappings_counts.CreateTables(
            ClientTags.TAG_DISPLAY_STORAGE,
            self.modules_services.combined_file_service_id, tag_service_id)

        #

        (current_mappings_table_name, deleted_mappings_table_name,
         pending_mappings_table_name, petitioned_mappings_table_name
         ) = ClientDBMappingsStorage.GenerateMappingsTableNames(tag_service_id)

        current_mappings_exist = self._Execute(
            'SELECT 1 FROM ' + current_mappings_table_name +
            ' LIMIT 1;').fetchone() is not None
        pending_mappings_exist = self._Execute(
            'SELECT 1 FROM ' + pending_mappings_table_name +
            ' LIMIT 1;').fetchone() is not None

        if current_mappings_exist or pending_mappings_exist:  # not worth iterating through all known tags for an empty service

            for (group_of_ids, num_done,
                 num_to_do) in HydrusDB.ReadLargeIdQueryInSeparateChunks(
                     self._c, 'SELECT tag_id FROM tags;',
                     10000):  # must be a cleverer way of doing this

                with self._MakeTemporaryIntegerTable(
                        group_of_ids, 'tag_id') as temp_table_name:

                    current_counter = collections.Counter()

                    # temp tags to mappings
                    for (tag_id, count) in self._Execute(
                            'SELECT tag_id, COUNT( * ) FROM {} CROSS JOIN {} USING ( tag_id ) GROUP BY ( tag_id );'
                            .format(temp_table_name,
                                    current_mappings_table_name)):

                        current_counter[tag_id] = count

                    pending_counter = collections.Counter()

                    # temp tags to mappings
                    for (tag_id, count) in self._Execute(
                            'SELECT tag_id, COUNT( * ) FROM {} CROSS JOIN {} USING ( tag_id ) GROUP BY ( tag_id );'
                            .format(temp_table_name,
                                    pending_mappings_table_name)):

                        pending_counter[tag_id] = count

                all_ids_seen = set(current_counter.keys())
                all_ids_seen.update(pending_counter.keys())

                counts_cache_changes = [(tag_id, current_counter[tag_id],
                                         pending_counter[tag_id])
                                        for tag_id in all_ids_seen]

                if len(counts_cache_changes) > 0:

                    self.modules_mappings_counts_update.AddCounts(
                        ClientTags.TAG_DISPLAY_STORAGE,
                        self.modules_services.combined_file_service_id,
                        tag_service_id, counts_cache_changes)

        self.modules_mappings_cache_combined_files_display.Generate(
            tag_service_id)

Example #21

0

Show file

    def GetSubtagIdsFromWildcardIntoTable(self,
                                          file_service_id: int,
                                          tag_service_id: int,
                                          subtag_wildcard,
                                          subtag_id_table_name,
                                          job_key=None):

        if tag_service_id == self.modules_services.combined_tag_service_id:

            search_tag_service_ids = self.modules_services.GetServiceIds(
                HC.REAL_TAG_SERVICES)

        else:

            search_tag_service_ids = (tag_service_id, )

        for search_tag_service_id in search_tag_service_ids:

            if '*' in subtag_wildcard:

                subtags_fts4_table_name = self.GetSubtagsFTS4TableName(
                    file_service_id, search_tag_service_id)

                wildcard_has_fts4_searchable_characters = WildcardHasFTS4SearchableCharacters(
                    subtag_wildcard)

                if subtag_wildcard == '*':

                    # hellmode, but shouldn't be called normally
                    cursor = self._Execute('SELECT docid FROM {};'.format(
                        subtags_fts4_table_name))

                elif ClientSearch.IsComplexWildcard(
                        subtag_wildcard
                ) or not wildcard_has_fts4_searchable_characters:

                    # FTS4 does not support complex wildcards, so instead we'll search our raw subtags
                    # however, since we want to search 'searchable' text, we use the 'searchable subtags map' to cross between real and searchable

                    like_param = ConvertWildcardToSQLiteLikeParameter(
                        subtag_wildcard)

                    if subtag_wildcard.startswith(
                            '*'
                    ) or not wildcard_has_fts4_searchable_characters:

                        # this is a SCAN, but there we go
                        # a potential optimisation here, in future, is to store fts4 of subtags reversed, then for '*amus', we can just search that reverse cache for 'suma*'
                        # and this would only double the size of the fts4 cache, the largest cache in the whole db! a steal!
                        # it also would not fix '*amu*', but with some cleverness could speed up '*amus ar*'

                        query = 'SELECT docid FROM {} WHERE subtag LIKE ?;'.format(
                            subtags_fts4_table_name)

                        cursor = self._Execute(query, (like_param, ))

                    else:

                        # we have an optimisation here--rather than searching all subtags for bl*ah, let's search all the bl* subtags for bl*ah!

                        prefix_fts4_wildcard = subtag_wildcard.split('*')[0]

                        prefix_fts4_wildcard_param = '"{}*"'.format(
                            prefix_fts4_wildcard)

                        query = 'SELECT docid FROM {} WHERE subtag MATCH ? AND subtag LIKE ?;'.format(
                            subtags_fts4_table_name)

                        cursor = self._Execute(
                            query, (prefix_fts4_wildcard_param, like_param))

                else:

                    # we want the " " wrapping our search text to keep whitespace words connected and in order
                    # "samus ar*" should not match "around samus"

                    # simple 'sam*' style subtag, so we can search fts4 no prob

                    subtags_fts4_param = '"{}"'.format(subtag_wildcard)

                    cursor = self._Execute(
                        'SELECT docid FROM {} WHERE subtag MATCH ?;'.format(
                            subtags_fts4_table_name), (subtags_fts4_param, ))

                cancelled_hook = None

                if job_key is not None:

                    cancelled_hook = job_key.IsCancelled

                loop_of_subtag_id_tuples = HydrusDB.ReadFromCancellableCursor(
                    cursor, 1024, cancelled_hook=cancelled_hook)

                self._ExecuteMany(
                    'INSERT OR IGNORE INTO {} ( subtag_id ) VALUES ( ? );'.
                    format(subtag_id_table_name), loop_of_subtag_id_tuples)

            else:

                # old notes from before we had searchable subtag map. I deleted that map once, albeit in an older and less efficient form. *don't delete it again, it has use*
                #
                # NOTE: doing a subtag = 'blah' lookup on subtags_fts4 tables is ultra slow, lmao!
                # attempts to match '/a/' to 'a' with clever FTS4 MATCHing (i.e. a MATCH on a*\b, then an '= a') proved not super successful
                # in testing, it was still a bit slow. my guess is it is still iterating through all the nodes for ^a*, the \b just makes it a bit more efficient sometimes
                # in tests '^a\b' was about twice as fast as 'a*', so the \b might not even be helping at all
                # so, I decided to move back to a lean and upgraded searchable subtag map, and here we are

                searchable_subtag = subtag_wildcard

                if self.modules_tags.SubtagExists(searchable_subtag):

                    searchable_subtag_id = self.modules_tags.GetSubtagId(
                        searchable_subtag)

                    self._Execute(
                        'INSERT OR IGNORE INTO {} ( subtag_id ) VALUES ( ? );'.
                        format(subtag_id_table_name), (searchable_subtag_id, ))

                    subtags_searchable_map_table_name = self.GetSubtagsSearchableMapTableName(
                        file_service_id, search_tag_service_id)

                    self._Execute(
                        'INSERT OR IGNORE INTO {} ( subtag_id ) SELECT subtag_id FROM {} WHERE searchable_subtag_id = ?;'
                        .format(subtag_id_table_name,
                                subtags_searchable_map_table_name),
                        (searchable_subtag_id, ))

            if job_key is not None and job_key.IsCancelled():

                self._Execute('DELETE FROM {};'.format(subtag_id_table_name))

                return

Example #22

0

Show file

 def _PopulateHashIdsToHashesCache( self, hash_ids, exception_on_error = False ):
     
     if len( self._hash_ids_to_hashes_cache ) > 100000:
         
         if not isinstance( hash_ids, set ):
             
             hash_ids = set( hash_ids )
             
         
         self._hash_ids_to_hashes_cache = { hash_id : hash for ( hash_id, hash ) in self._hash_ids_to_hashes_cache.items() if hash_id in hash_ids }
         
     
     uncached_hash_ids = { hash_id for hash_id in hash_ids if hash_id not in self._hash_ids_to_hashes_cache }
     
     if len( uncached_hash_ids ) > 0:
         
         pubbed_error = False
         
         if len( uncached_hash_ids ) == 1:
             
             ( uncached_hash_id, ) = uncached_hash_ids
             
             rows = self._c.execute( 'SELECT hash_id, hash FROM hashes WHERE hash_id = ?;', ( uncached_hash_id, )  ).fetchall()
             
         else:
             
             with HydrusDB.TemporaryIntegerTable( self._c, uncached_hash_ids, 'hash_id' ) as temp_table_name:
                 
                 # temp hash_ids to actual hashes
                 rows = self._c.execute( 'SELECT hash_id, hash FROM {} CROSS JOIN hashes USING ( hash_id );'.format( temp_table_name ) ).fetchall()
                 
             
         
         uncached_hash_ids_to_hashes = dict( rows )
         
         if len( uncached_hash_ids_to_hashes ) < len( uncached_hash_ids ):
             
             for hash_id in uncached_hash_ids:
                 
                 if hash_id not in uncached_hash_ids_to_hashes:
                     
                     if exception_on_error:
                         
                         raise HydrusExceptions.DataMissing( 'Did not find all entries for those hash ids!' )
                         
                     
                     HydrusData.DebugPrint( 'Database hash error: hash_id ' + str( hash_id ) + ' was missing!' )
                     HydrusData.PrintException( Exception( 'Missing file identifier stack trace.' ) )
                     
                     if not pubbed_error:
                         
                         HydrusData.ShowText( 'A file identifier was missing! This is a serious error that means your client database has an orphan file id! Think about contacting hydrus dev!' )
                         
                         pubbed_error = True
                         
                     
                     hash = bytes.fromhex( 'aaaaaaaaaaaaaaaa' ) + os.urandom( 16 )
                     
                     uncached_hash_ids_to_hashes[ hash_id ] = hash
                     
                 
             
         
         self._hash_ids_to_hashes_cache.update( uncached_hash_ids_to_hashes )

Example #23

0

Show file

File: ClientDBFilesStorage.py Project: thatfuckingbird/hydrus

    def ClearLocalDeleteRecord(self, hash_ids=None):

        # we delete from everywhere, but not for files currently in the trash

        service_ids_to_nums_cleared = {}

        local_non_trash_service_ids = self.modules_services.GetServiceIds(
            (HC.COMBINED_LOCAL_FILE, HC.LOCAL_FILE_DOMAIN))

        if hash_ids is None:

            trash_current_files_table_name = GenerateFilesTableName(
                self.modules_services.trash_service_id,
                HC.CONTENT_STATUS_CURRENT)

            for service_id in local_non_trash_service_ids:

                deleted_files_table_name = GenerateFilesTableName(
                    service_id, HC.CONTENT_STATUS_DELETED)

                self._c.execute(
                    'DELETE FROM {} WHERE hash_id NOT IN ( SELECT hash_id FROM {} );'
                    .format(deleted_files_table_name,
                            trash_current_files_table_name))

                num_cleared = HydrusDB.GetRowCount(self._c)

                service_ids_to_nums_cleared[service_id] = num_cleared

            self._c.execute(
                'DELETE FROM local_file_deletion_reasons WHERE hash_id NOT IN ( SELECT hash_id FROM {} );'
                .format(trash_current_files_table_name))

        else:

            trashed_hash_ids = self.FilterCurrentHashIds(
                self.modules_services.trash_service_id, hash_ids)

            ok_to_clear_hash_ids = set(hash_ids).difference(trashed_hash_ids)

            if len(ok_to_clear_hash_ids) > 0:

                for service_id in local_non_trash_service_ids:

                    deleted_files_table_name = GenerateFilesTableName(
                        service_id, HC.CONTENT_STATUS_DELETED)

                    self._c.executemany(
                        'DELETE FROM {} WHERE hash_id = ?;'.format(
                            deleted_files_table_name),
                        ((hash_id, ) for hash_id in ok_to_clear_hash_ids))

                    num_cleared = HydrusDB.GetRowCount(self._c)

                    service_ids_to_nums_cleared[service_id] = num_cleared

                self._c.executemany(
                    'DELETE FROM local_file_deletion_reasons WHERE hash_id = ?;',
                    ((hash_id, ) for hash_id in ok_to_clear_hash_ids))

        return service_ids_to_nums_cleared

Example #24

0

Show file

 def GetHashIdsAndNonZeroTagCounts( self, tag_display_type: int, location_context: ClientLocation.LocationContext, tag_search_context: ClientSearch.TagSearchContext, hash_ids, namespace_wildcard = None, job_key = None ):
     
     if namespace_wildcard == '*':
         
         namespace_wildcard = None
         
     
     if namespace_wildcard is None:
         
         namespace_ids = []
         
     else:
         
         namespace_ids = self.modules_tag_search.GetNamespaceIdsFromWildcard( namespace_wildcard )
         
     
     with self._MakeTemporaryIntegerTable( namespace_ids, 'namespace_id' ) as temp_namespace_ids_table_name:
         
         ( file_service_keys, file_location_is_cross_referenced ) = location_context.GetCoveringCurrentFileServiceKeys()
         
         mapping_and_tag_table_names = set()
         
         for file_service_key in file_service_keys:
             
             mapping_and_tag_table_names.update( self.modules_tag_search.GetMappingAndTagTables( tag_display_type, file_service_key, tag_search_context ) )
             
         
         # reason why I (JOIN each table) rather than (join the UNION) is based on previous hell with having query planner figure out a "( a UNION b UNION c ) NATURAL JOIN stuff" situation
         # although the following sometimes makes certifiable 2KB ( 6 UNION * 4-table ) queries, it actually works fast
         
         # OK, a new problem is mass UNION leads to terrible cancelability because the first row cannot be fetched until the first n - 1 union queries are done
         # I tried some gubbins to try to do a pseudo table-union rather than query union and do 'get files->distinct tag count for this union of tables, and fetch hash_ids first on the union', but did not have luck
         
         # so NOW we are just going to do it in bits of files mate. this also reduces memory use from the distinct-making UNION with large numbers of hash_ids
         
         results = []
         
         BLOCK_SIZE = max( 64, int( len( hash_ids ) ** 0.5 ) ) # go for square root for now
         
         for group_of_hash_ids in HydrusData.SplitIteratorIntoChunks( hash_ids, BLOCK_SIZE ):
             
             with self._MakeTemporaryIntegerTable( group_of_hash_ids, 'hash_id' ) as hash_ids_table_name:
                 
                 if namespace_wildcard is None:
                     
                     # temp hashes to mappings
                     select_statements = [ 'SELECT hash_id, tag_id FROM {} CROSS JOIN {} USING ( hash_id )'.format( hash_ids_table_name, mappings_table_name ) for ( mappings_table_name, tags_table_name ) in mapping_and_tag_table_names ]
                     
                 else:
                     
                     # temp hashes to mappings to tags to namespaces
                     select_statements = [ 'SELECT hash_id, tag_id FROM {} CROSS JOIN {} USING ( hash_id ) CROSS JOIN {} USING ( tag_id ) CROSS JOIN {} USING ( namespace_id )'.format( hash_ids_table_name, mappings_table_name, tags_table_name, temp_namespace_ids_table_name ) for ( mappings_table_name, tags_table_name ) in mapping_and_tag_table_names ]
                     
                 
                 unions = '( {} )'.format( ' UNION '.join( select_statements ) )
                 
                 query = 'SELECT hash_id, COUNT( tag_id ) FROM {} GROUP BY hash_id;'.format( unions )
                 
                 cursor = self._Execute( query )
                 
                 cancelled_hook = None
                 
                 if job_key is not None:
                     
                     cancelled_hook = job_key.IsCancelled
                     
                 
                 loop_of_results = HydrusDB.ReadFromCancellableCursor( cursor, 64, cancelled_hook = cancelled_hook )
                 
                 if job_key is not None and job_key.IsCancelled():
                     
                     return results
                     
                 
                 results.extend( loop_of_results )
                 
             
         
         return results

Example #25

0

Show file

File: ClientDBSimilarFiles.py Project: Wyrrrd/suika-hydrus-server

    def _RegenerateBranch(self, job_key, phash_id):

        job_key.SetVariable('popup_text_2', 'reviewing existing branch')

        # grab everything in the branch

        (parent_id, ) = self._c.execute(
            'SELECT parent_id FROM shape_vptree WHERE phash_id = ?;',
            (phash_id, )).fetchone()

        cte_table_name = 'branch ( branch_phash_id )'
        initial_select = 'SELECT ?'
        recursive_select = 'SELECT phash_id FROM shape_vptree, branch ON parent_id = branch_phash_id'

        with_clause = 'WITH RECURSIVE ' + cte_table_name + ' AS ( ' + initial_select + ' UNION ALL ' + recursive_select + ')'

        unbalanced_nodes = self._c.execute(
            with_clause +
            ' SELECT branch_phash_id, phash FROM branch, shape_perceptual_hashes ON phash_id = branch_phash_id;',
            (phash_id, )).fetchall()

        # removal of old branch, maintenance schedule, and orphan phashes

        job_key.SetVariable(
            'popup_text_2',
            HydrusData.ToHumanInt(len(unbalanced_nodes)) +
            ' leaves found--now clearing out old branch')

        unbalanced_phash_ids = {p_id for (p_id, p_h) in unbalanced_nodes}

        self._c.executemany('DELETE FROM shape_vptree WHERE phash_id = ?;',
                            ((p_id, ) for p_id in unbalanced_phash_ids))

        self._c.executemany(
            'DELETE FROM shape_maintenance_branch_regen WHERE phash_id = ?;',
            ((p_id, ) for p_id in unbalanced_phash_ids))

        with HydrusDB.TemporaryIntegerTable(
                self._c, unbalanced_phash_ids,
                'phash_id') as temp_phash_ids_table_name:

            useful_phash_ids = self._STS(
                self._c.execute(
                    'SELECT phash_id FROM {} CROSS JOIN shape_perceptual_hash_map USING ( phash_id );'
                    .format(temp_phash_ids_table_name)))

        orphan_phash_ids = unbalanced_phash_ids.difference(useful_phash_ids)

        self._c.executemany(
            'DELETE FROM shape_perceptual_hashes WHERE phash_id = ?;',
            ((p_id, ) for p_id in orphan_phash_ids))

        useful_nodes = [
            row for row in unbalanced_nodes if row[0] in useful_phash_ids
        ]

        useful_population = len(useful_nodes)

        # now create the new branch, starting by choosing a new root and updating the parent's left/right reference to that

        if useful_population > 0:

            (new_phash_id, new_phash) = self._PopBestRootNode(
                useful_nodes)  #HydrusData.RandomPop( useful_nodes )

        else:

            new_phash_id = None

        if parent_id is not None:

            (parent_inner_id, ) = self._c.execute(
                'SELECT inner_id FROM shape_vptree WHERE phash_id = ?;',
                (parent_id, )).fetchone()

            if parent_inner_id == phash_id:

                query = 'UPDATE shape_vptree SET inner_id = ?, inner_population = ? WHERE phash_id = ?;'

            else:

                query = 'UPDATE shape_vptree SET outer_id = ?, outer_population = ? WHERE phash_id = ?;'

            self._c.execute(query,
                            (new_phash_id, useful_population, parent_id))

        if useful_population > 0:

            self._GenerateBranch(job_key, parent_id, new_phash_id, new_phash,
                                 useful_nodes)

Example #26

0

Show file

File: ClientDBSimilarFiles.py Project: Wyrrrd/suika-hydrus-server

    def Search(self, hash_id, max_hamming_distance):

        if max_hamming_distance == 0:

            similar_hash_ids = self._STL(
                self._c.execute(
                    'SELECT hash_id FROM shape_perceptual_hash_map WHERE phash_id IN ( SELECT phash_id FROM shape_perceptual_hash_map WHERE hash_id = ? );',
                    (hash_id, )))

            similar_hash_ids_and_distances = [
                (similar_hash_id, 0) for similar_hash_id in similar_hash_ids
            ]

        else:

            search_radius = max_hamming_distance

            top_node_result = self._c.execute(
                'SELECT phash_id FROM shape_vptree WHERE parent_id IS NULL;'
            ).fetchone()

            if top_node_result is None:

                return []

            (root_node_phash_id, ) = top_node_result

            search = self._STL(
                self._c.execute(
                    'SELECT phash FROM shape_perceptual_hashes NATURAL JOIN shape_perceptual_hash_map WHERE hash_id = ?;',
                    (hash_id, )))

            if len(search) == 0:

                return []

            similar_phash_ids_to_distances = {}

            num_cycles = 0
            total_nodes_searched = 0

            for search_phash in search:

                next_potentials = [root_node_phash_id]

                while len(next_potentials) > 0:

                    current_potentials = next_potentials
                    next_potentials = []

                    num_cycles += 1
                    total_nodes_searched += len(current_potentials)

                    for group_of_current_potentials in HydrusData.SplitListIntoChunks(
                            current_potentials, 10000):

                        # this is split into fixed lists of results of subgroups because as an iterable it was causing crashes on linux!!
                        # after investigation, it seemed to be SQLite having a problem with part of Get64BitHammingDistance touching phashes it presumably was still hanging on to
                        # the crash was in sqlite code, again presumably on subsequent fetch
                        # adding a delay in seemed to fix it as well. guess it was some memory maintenance buffer/bytes thing
                        # anyway, we now just get the whole lot of results first and then work on the whole lot
                        '''
                        #old method
                        select_statement = 'SELECT phash_id, phash, radius, inner_id, outer_id FROM shape_perceptual_hashes NATURAL JOIN shape_vptree WHERE phash_id = ?;'
                        
                        results = list( self._ExecuteManySelectSingleParam( select_statement, group_of_current_potentials ) )
                        '''

                        with HydrusDB.TemporaryIntegerTable(
                                self._c, group_of_current_potentials,
                                'phash_id') as temp_table_name:

                            # temp phash_ids to actual phashes and tree info
                            results = self._c.execute(
                                'SELECT phash_id, phash, radius, inner_id, outer_id FROM {} CROSS JOIN shape_perceptual_hashes USING ( phash_id ) CROSS JOIN shape_vptree USING ( phash_id );'
                                .format(temp_table_name)).fetchall()

                        for (node_phash_id, node_phash, node_radius,
                             inner_phash_id, outer_phash_id) in results:

                            # first check the node itself--is it similar?

                            node_hamming_distance = HydrusData.Get64BitHammingDistance(
                                search_phash, node_phash)

                            if node_hamming_distance <= search_radius:

                                if node_phash_id in similar_phash_ids_to_distances:

                                    current_distance = similar_phash_ids_to_distances[
                                        node_phash_id]

                                    similar_phash_ids_to_distances[
                                        node_phash_id] = min(
                                            node_hamming_distance,
                                            current_distance)

                                else:

                                    similar_phash_ids_to_distances[
                                        node_phash_id] = node_hamming_distance

                            # now how about its children?

                            if node_radius is not None:

                                # we have two spheres--node and search--their centers separated by node_hamming_distance
                                # we want to search inside/outside the node_sphere if the search_sphere intersects with those spaces
                                # there are four possibles:
                                # (----N----)-(--S--)    intersects with outer only - distance between N and S > their radii
                                # (----N---(-)-S--)      intersects with both
                                # (----N-(--S-)-)        intersects with both
                                # (---(-N-S--)-)         intersects with inner only - distance between N and S + radius_S does not exceed radius_N

                                if inner_phash_id is not None:

                                    spheres_disjoint = node_hamming_distance > (
                                        node_radius + search_radius)

                                    if not spheres_disjoint:  # i.e. they intersect at some point

                                        next_potentials.append(inner_phash_id)

                                if outer_phash_id is not None:

                                    search_sphere_subset_of_node_sphere = (
                                        node_hamming_distance +
                                        search_radius) <= node_radius

                                    if not search_sphere_subset_of_node_sphere:  # i.e. search sphere intersects with non-node sphere space at some point

                                        next_potentials.append(outer_phash_id)

            if HG.db_report_mode:

                HydrusData.ShowText(
                    'Similar file search touched {} nodes over {} cycles.'.
                    format(HydrusData.ToHumanInt(total_nodes_searched),
                           HydrusData.ToHumanInt(num_cycles)))

            # so, now we have phash_ids and distances. let's map that to actual files.
            # files can have multiple phashes, and phashes can refer to multiple files, so let's make sure we are setting the smallest distance we found

            similar_phash_ids = list(similar_phash_ids_to_distances.keys())

            with HydrusDB.TemporaryIntegerTable(self._c, similar_phash_ids,
                                                'phash_id') as temp_table_name:

                # temp phashes to hash map
                similar_phash_ids_to_hash_ids = HydrusData.BuildKeyToListDict(
                    self._c.execute(
                        'SELECT phash_id, hash_id FROM {} CROSS JOIN shape_perceptual_hash_map USING ( phash_id );'
                        .format(temp_table_name)))

            similar_hash_ids_to_distances = {}

            for (phash_id, hash_ids) in similar_phash_ids_to_hash_ids.items():

                distance = similar_phash_ids_to_distances[phash_id]

                for hash_id in hash_ids:

                    if hash_id not in similar_hash_ids_to_distances:

                        similar_hash_ids_to_distances[hash_id] = distance

                    else:

                        current_distance = similar_hash_ids_to_distances[
                            hash_id]

                        if distance < current_distance:

                            similar_hash_ids_to_distances[hash_id] = distance

            similar_hash_ids_and_distances = list(
                similar_hash_ids_to_distances.items())

        return similar_hash_ids_and_distances

Example #27

0

Show file

 def GetHashIdsFromTagIds( self, tag_display_type: int, file_service_key: bytes, tag_search_context: ClientSearch.TagSearchContext, tag_ids: typing.Collection[ int ], hash_ids = None, hash_ids_table_name = None, job_key = None ):
     
     do_hash_table_join = False
     
     if hash_ids_table_name is not None and hash_ids is not None:
         
         tag_service_id = self.modules_services.GetServiceId( tag_search_context.service_key )
         file_service_id = self.modules_services.GetServiceId( file_service_key )
         
         estimated_count = self.modules_mappings_counts.GetAutocompleteCountEstimate( tag_display_type, tag_service_id, file_service_id, tag_ids, tag_search_context.include_current_tags, tag_search_context.include_pending_tags )
         
         if ClientDBMappingsStorage.DoingAFileJoinTagSearchIsFaster( len( hash_ids ), estimated_count ):
             
             do_hash_table_join = True
             
         
     
     result_hash_ids = set()
     
     table_names = self.modules_tag_search.GetMappingTables( tag_display_type, file_service_key, tag_search_context )
     
     cancelled_hook = None
     
     if job_key is not None:
         
         cancelled_hook = job_key.IsCancelled
         
     
     if len( tag_ids ) == 1:
         
         ( tag_id, ) = tag_ids
         
         if do_hash_table_join:
             
             # temp hashes to mappings
             queries = [ 'SELECT hash_id FROM {} CROSS JOIN {} USING ( hash_id ) WHERE tag_id = ?'.format( hash_ids_table_name, table_name ) for table_name in table_names ]
             
         else:
             
             queries = [ 'SELECT hash_id FROM {} WHERE tag_id = ?;'.format( table_name ) for table_name in table_names ]
             
         
         for query in queries:
             
             cursor = self._Execute( query, ( tag_id, ) )
             
             result_hash_ids.update( self._STI( HydrusDB.ReadFromCancellableCursor( cursor, 1024, cancelled_hook ) ) )
             
         
     else:
         
         with self._MakeTemporaryIntegerTable( tag_ids, 'tag_id' ) as temp_tag_ids_table_name:
             
             if do_hash_table_join:
                 
                 # temp hashes to mappings to temp tags
                 # old method, does not do EXISTS efficiently, it makes a list instead and checks that
                 # queries = [ 'SELECT hash_id FROM {} WHERE EXISTS ( SELECT 1 FROM {} CROSS JOIN {} USING ( tag_id ) WHERE {}.hash_id = {}.hash_id );'.format( hash_ids_table_name, table_name, temp_tag_ids_table_name, table_name, hash_ids_table_name ) for table_name in table_names ]
                 # new method, this seems to actually do the correlated scalar subquery, although it does seem to be sqlite voodoo
                 queries = [ 'SELECT hash_id FROM {} WHERE EXISTS ( SELECT 1 FROM {} WHERE {}.hash_id = {}.hash_id AND EXISTS ( SELECT 1 FROM {} WHERE {}.tag_id = {}.tag_id ) );'.format( hash_ids_table_name, table_name, table_name, hash_ids_table_name, temp_tag_ids_table_name, table_name, temp_tag_ids_table_name ) for table_name in table_names ]
                 
             else:
                 
                 # temp tags to mappings
                 queries = [ 'SELECT hash_id FROM {} CROSS JOIN {} USING ( tag_id );'.format( temp_tag_ids_table_name, table_name ) for table_name in table_names ]
                 
             
             for query in queries:
                 
                 cursor = self._Execute( query )
                 
                 result_hash_ids.update( self._STI( HydrusDB.ReadFromCancellableCursor( cursor, 1024, cancelled_hook ) ) )
                 
             
         
     
     return result_hash_ids