Python HydrusDB.TemporaryIntegerTable 예제들, hydrus.core.HydrusDB.TemporaryIntegerTable Python 예제들

예제 #1

0

파일 보기

파일: ClientDBFilesStorage.py 프로젝트: thatfuckingbird/hydrus

    def FilterAllPendingHashIds(self, hash_ids, just_these_service_ids=None):

        if just_these_service_ids is None:

            service_ids = self.modules_services.GetServiceIds(
                HC.SPECIFIC_FILE_SERVICES)

        else:

            service_ids = just_these_service_ids

        pending_hash_ids = set()

        with HydrusDB.TemporaryIntegerTable(
                self._c, hash_ids, 'hash_id') as temp_hash_ids_table_name:

            for service_id in service_ids:

                pending_files_table_name = GenerateFilesTableName(
                    service_id, HC.CONTENT_STATUS_PENDING)

                hash_id_iterator = self._STI(
                    self._c.execute(
                        'SELECT hash_id FROM {} CROSS JOIN {} USING ( hash_id );'
                        .format(temp_hash_ids_table_name,
                                pending_files_table_name)))

                pending_hash_ids.update(hash_id_iterator)

        return pending_hash_ids

예제 #2

0

파일 보기

파일: ClientDBFilesMetadataBasic.py 프로젝트: Suika/hydrus-build-test

    def GetTotalSize(self, hash_ids: typing.Collection[int]) -> int:

        if len(hash_ids) == 1:

            (hash_id, ) = hash_ids

            result = self._c.execute(
                'SELECT size FROM files_info WHERE hash_id = ?;',
                (hash_id, )).fetchone()

        else:

            with HydrusDB.TemporaryIntegerTable(
                    self._c, hash_ids, 'hash_id') as temp_hash_ids_table_name:

                result = self._c.execute(
                    'SELECT SUM( size ) FROM {} CROSS JOIN files_info USING ( hash_id );'
                    .format(temp_hash_ids_table_name)).fetchone()

        if result is None:

            return 0

        (total_size, ) = result

        return total_size

예제 #3

0

파일 보기

 def _PopulateTagIdsToTagsCache( self, tag_ids ):
     
     if len( self._tag_ids_to_tags_cache ) > 100000:
         
         if not isinstance( tag_ids, set ):
             
             tag_ids = set( tag_ids )
             
         
         self._tag_ids_to_tags_cache = { tag_id : tag for ( tag_id, tag ) in self._tag_ids_to_tags_cache.items() if tag_id in tag_ids }
         
     
     uncached_tag_ids = { tag_id for tag_id in tag_ids if tag_id not in self._tag_ids_to_tags_cache }
     
     if len( uncached_tag_ids ) > 0:
         
         if len( uncached_tag_ids ) == 1:
             
             ( uncached_tag_id, ) = uncached_tag_ids
             
             rows = self._c.execute( 'SELECT tag_id, namespace, subtag FROM tags NATURAL JOIN namespaces NATURAL JOIN subtags WHERE tag_id = ?;', ( uncached_tag_id, ) ).fetchall()
             
         else:
             
             with HydrusDB.TemporaryIntegerTable( self._c, uncached_tag_ids, 'tag_id' ) as temp_table_name:
                 
                 # temp tag_ids to tags to subtags and namespaces
                 rows = self._c.execute( 'SELECT tag_id, namespace, subtag FROM {} CROSS JOIN tags USING ( tag_id ) CROSS JOIN subtags USING ( subtag_id ) CROSS JOIN namespaces USING ( namespace_id );'.format( temp_table_name ) ).fetchall()
                 
             
         
         uncached_tag_ids_to_tags = { tag_id : HydrusTags.CombineTag( namespace, subtag ) for ( tag_id, namespace, subtag ) in rows }
         
         if len( uncached_tag_ids_to_tags ) < len( uncached_tag_ids ):
             
             for tag_id in uncached_tag_ids:
                 
                 if tag_id not in uncached_tag_ids_to_tags:
                     
                     tag = 'unknown tag:' + HydrusData.GenerateKey().hex()
                     
                     ( namespace, subtag ) = HydrusTags.SplitTag( tag )
                     
                     namespace_id = self.GetNamespaceId( namespace )
                     subtag_id = self.GetSubtagId( subtag )
                     
                     self._c.execute( 'REPLACE INTO tags ( tag_id, namespace_id, subtag_id ) VALUES ( ?, ?, ? );', ( tag_id, namespace_id, subtag_id ) )
                     
                     uncached_tag_ids_to_tags[ tag_id ] = tag
                     
                 
             
         
         self._tag_ids_to_tags_cache.update( uncached_tag_ids_to_tags )

예제 #4

0

파일 보기

파일: ClientDBFilesStorage.py 프로젝트: thatfuckingbird/hydrus

    def GetUndeleteRows(self, service_id, hash_ids):

        deleted_files_table_name = GenerateFilesTableName(
            service_id, HC.CONTENT_STATUS_DELETED)

        with HydrusDB.TemporaryIntegerTable(
                self._c, hash_ids, 'hash_id') as temp_hash_ids_table_name:

            rows = self._c.execute(
                'SELECT hash_id, original_timestamp FROM {} CROSS JOIN {} USING ( hash_id );'
                .format(temp_hash_ids_table_name,
                        deleted_files_table_name)).fetchall()

        return rows

예제 #5

0

파일 보기

파일: ClientDBFilesStorage.py 프로젝트: thatfuckingbird/hydrus

    def GetCurrentHashIdsToTimestamps(self, service_id, hash_ids):

        current_files_table_name = GenerateFilesTableName(
            service_id, HC.CONTENT_STATUS_CURRENT)

        with HydrusDB.TemporaryIntegerTable(
                self._c, hash_ids, 'hash_id') as temp_hash_ids_table_name:

            rows = dict(
                self._c.execute(
                    'SELECT hash_id, timestamp FROM {} CROSS JOIN {} USING ( hash_id );'
                    .format(temp_hash_ids_table_name,
                            current_files_table_name)))

        return rows

예제 #6

0

파일 보기

파일: ClientDBFilesStorage.py 프로젝트: thatfuckingbird/hydrus

    def FilterPendingHashIds(self, service_id, hash_ids):

        if service_id == self.modules_services.combined_file_service_id:

            return set(hash_ids)

        with HydrusDB.TemporaryIntegerTable(
                self._c, hash_ids, 'hash_id') as temp_hash_ids_table_name:

            pending_files_table_name = GenerateFilesTableName(
                service_id, HC.CONTENT_STATUS_PENDING)

            pending_hash_ids = self._STS(
                self._c.execute(
                    'SELECT hash_id FROM {} CROSS JOIN {} USING ( hash_id );'.
                    format(temp_hash_ids_table_name,
                           pending_files_table_name)))

        return pending_hash_ids

예제 #7

0

파일 보기

파일: ClientDBDefinitionsCache.py 프로젝트: Suika/hydrus-build-test

 def _PopulateHashIdsToHashesCache( self, hash_ids ):
     
     if len( self._hash_ids_to_hashes_cache ) > 100000:
         
         if not isinstance( hash_ids, set ):
             
             hash_ids = set( hash_ids )
             
         
         self._hash_ids_to_hashes_cache = { hash_id : hash for ( hash_id, hash ) in self._hash_ids_to_hashes_cache.items() if hash_id in hash_ids }
         
     
     uncached_hash_ids = { hash_id for hash_id in hash_ids if hash_id not in self._hash_ids_to_hashes_cache }
     
     if len( uncached_hash_ids ) > 0:
         
         if len( uncached_hash_ids ) == 1:
             
             ( uncached_hash_id, ) = uncached_hash_ids
             
             # this makes 0 or 1 rows, so do fetchall rather than fetchone
             local_uncached_hash_ids_to_hashes = { hash_id : hash for ( hash_id, hash ) in self._c.execute( 'SELECT hash_id, hash FROM local_hashes_cache WHERE hash_id = ?;', ( uncached_hash_id, ) ) }
             
         else:
             
             with HydrusDB.TemporaryIntegerTable( self._c, uncached_hash_ids, 'hash_id' ) as temp_table_name:
                 
                 # temp hash_ids to actual hashes
                 local_uncached_hash_ids_to_hashes = { hash_id : hash for ( hash_id, hash ) in self._c.execute( 'SELECT hash_id, hash FROM {} CROSS JOIN local_hashes_cache USING ( hash_id );'.format( temp_table_name ) ) }
                 
             
         
         self._hash_ids_to_hashes_cache.update( local_uncached_hash_ids_to_hashes )
         
         uncached_hash_ids = { hash_id for hash_id in uncached_hash_ids if hash_id not in self._hash_ids_to_hashes_cache }
         
     
     if len( uncached_hash_ids ) > 0:
         
         hash_ids_to_hashes = self.modules_hashes.GetHashIdsToHashes( hash_ids = uncached_hash_ids )
         
         self._hash_ids_to_hashes_cache.update( hash_ids_to_hashes )

예제 #8

0

파일 보기

파일: ClientDBDefinitionsCache.py 프로젝트: Suika/hydrus-build-test

 def _PopulateTagIdsToTagsCache( self, tag_ids ):
     
     if len( self._tag_ids_to_tags_cache ) > 100000:
         
         if not isinstance( tag_ids, set ):
             
             tag_ids = set( tag_ids )
             
         
         self._tag_ids_to_tags_cache = { tag_id : tag for ( tag_id, tag ) in self._tag_ids_to_tags_cache.items() if tag_id in tag_ids }
         
     
     uncached_tag_ids = { tag_id for tag_id in tag_ids if tag_id not in self._tag_ids_to_tags_cache }
     
     if len( uncached_tag_ids ) > 0:
         
         if len( uncached_tag_ids ) == 1:
             
             ( uncached_tag_id, ) = uncached_tag_ids
             
             # this makes 0 or 1 rows, so do fetchall rather than fetchone
             local_uncached_tag_ids_to_tags = { tag_id : tag for ( tag_id, tag ) in self._c.execute( 'SELECT tag_id, tag FROM local_tags_cache WHERE tag_id = ?;', ( uncached_tag_id, ) ) }
             
         else:
             
             with HydrusDB.TemporaryIntegerTable( self._c, uncached_tag_ids, 'tag_id' ) as temp_table_name:
                 
                 # temp tag_ids to actual tags
                 local_uncached_tag_ids_to_tags = { tag_id : tag for ( tag_id, tag ) in self._c.execute( 'SELECT tag_id, tag FROM {} CROSS JOIN local_tags_cache USING ( tag_id );'.format( temp_table_name ) ) }
                 
             
         
         self._tag_ids_to_tags_cache.update( local_uncached_tag_ids_to_tags )
         
         uncached_tag_ids = { tag_id for tag_id in uncached_tag_ids if tag_id not in self._tag_ids_to_tags_cache }
         
     
     if len( uncached_tag_ids ) > 0:
         
         tag_ids_to_tags = self.modules_tags.GetTagIdsToTags( tag_ids = uncached_tag_ids )
         
         self._tag_ids_to_tags_cache.update( tag_ids_to_tags )

예제 #9

0

파일 보기

파일: ClientDBFilesStorage.py 프로젝트: thatfuckingbird/hydrus

    def GetServiceIdCounts(self, hash_ids) -> typing.Dict[int, int]:

        with HydrusDB.TemporaryIntegerTable(
                self._c, hash_ids, 'hash_id') as temp_hash_ids_table_name:

            service_ids_to_counts = {}

            for service_id in self.modules_services.GetServiceIds(
                    HC.SPECIFIC_FILE_SERVICES):

                current_files_table_name = GenerateFilesTableName(
                    service_id, HC.CONTENT_STATUS_CURRENT)

                # temp hashes to files
                (count, ) = self._c.execute(
                    'SELECT COUNT( * ) FROM {} CROSS JOIN {} USING ( hash_id );'
                    .format(temp_hash_ids_table_name,
                            current_files_table_name)).fetchone()

                service_ids_to_counts[service_id] = count

        return service_ids_to_counts

예제 #10

0

파일 보기

파일: ClientDBFilesMetadataBasic.py 프로젝트: Suika/hydrus-build-test

    def GetNumViewable(self, hash_ids: typing.Collection[int]) -> int:

        if len(hash_ids) == 1:

            (hash_id, ) = hash_ids

            result = self._STL(
                self._c.execute(
                    'SELECT mime FROM files_info WHERE hash_id = ?;',
                    (hash_id, )))

        else:

            with HydrusDB.TemporaryIntegerTable(
                    self._c, hash_ids, 'hash_id') as temp_hash_ids_table_name:

                result = self._STL(
                    self._c.execute(
                        'SELECT mime FROM {} CROSS JOIN files_info USING ( hash_id );'
                        .format(temp_hash_ids_table_name)))

        return sum((1 for mime in result if mime in HC.SEARCHABLE_MIMES))

예제 #11

0

파일 보기

파일: ClientDBSimilarFiles.py 프로젝트: Wyrrrd/suika-hydrus-server

    def Search(self, hash_id, max_hamming_distance):

        if max_hamming_distance == 0:

            similar_hash_ids = self._STL(
                self._c.execute(
                    'SELECT hash_id FROM shape_perceptual_hash_map WHERE phash_id IN ( SELECT phash_id FROM shape_perceptual_hash_map WHERE hash_id = ? );',
                    (hash_id, )))

            similar_hash_ids_and_distances = [
                (similar_hash_id, 0) for similar_hash_id in similar_hash_ids
            ]

        else:

            search_radius = max_hamming_distance

            top_node_result = self._c.execute(
                'SELECT phash_id FROM shape_vptree WHERE parent_id IS NULL;'
            ).fetchone()

            if top_node_result is None:

                return []

            (root_node_phash_id, ) = top_node_result

            search = self._STL(
                self._c.execute(
                    'SELECT phash FROM shape_perceptual_hashes NATURAL JOIN shape_perceptual_hash_map WHERE hash_id = ?;',
                    (hash_id, )))

            if len(search) == 0:

                return []

            similar_phash_ids_to_distances = {}

            num_cycles = 0
            total_nodes_searched = 0

            for search_phash in search:

                next_potentials = [root_node_phash_id]

                while len(next_potentials) > 0:

                    current_potentials = next_potentials
                    next_potentials = []

                    num_cycles += 1
                    total_nodes_searched += len(current_potentials)

                    for group_of_current_potentials in HydrusData.SplitListIntoChunks(
                            current_potentials, 10000):

                        # this is split into fixed lists of results of subgroups because as an iterable it was causing crashes on linux!!
                        # after investigation, it seemed to be SQLite having a problem with part of Get64BitHammingDistance touching phashes it presumably was still hanging on to
                        # the crash was in sqlite code, again presumably on subsequent fetch
                        # adding a delay in seemed to fix it as well. guess it was some memory maintenance buffer/bytes thing
                        # anyway, we now just get the whole lot of results first and then work on the whole lot
                        '''
                        #old method
                        select_statement = 'SELECT phash_id, phash, radius, inner_id, outer_id FROM shape_perceptual_hashes NATURAL JOIN shape_vptree WHERE phash_id = ?;'
                        
                        results = list( self._ExecuteManySelectSingleParam( select_statement, group_of_current_potentials ) )
                        '''

                        with HydrusDB.TemporaryIntegerTable(
                                self._c, group_of_current_potentials,
                                'phash_id') as temp_table_name:

                            # temp phash_ids to actual phashes and tree info
                            results = self._c.execute(
                                'SELECT phash_id, phash, radius, inner_id, outer_id FROM {} CROSS JOIN shape_perceptual_hashes USING ( phash_id ) CROSS JOIN shape_vptree USING ( phash_id );'
                                .format(temp_table_name)).fetchall()

                        for (node_phash_id, node_phash, node_radius,
                             inner_phash_id, outer_phash_id) in results:

                            # first check the node itself--is it similar?

                            node_hamming_distance = HydrusData.Get64BitHammingDistance(
                                search_phash, node_phash)

                            if node_hamming_distance <= search_radius:

                                if node_phash_id in similar_phash_ids_to_distances:

                                    current_distance = similar_phash_ids_to_distances[
                                        node_phash_id]

                                    similar_phash_ids_to_distances[
                                        node_phash_id] = min(
                                            node_hamming_distance,
                                            current_distance)

                                else:

                                    similar_phash_ids_to_distances[
                                        node_phash_id] = node_hamming_distance

                            # now how about its children?

                            if node_radius is not None:

                                # we have two spheres--node and search--their centers separated by node_hamming_distance
                                # we want to search inside/outside the node_sphere if the search_sphere intersects with those spaces
                                # there are four possibles:
                                # (----N----)-(--S--)    intersects with outer only - distance between N and S > their radii
                                # (----N---(-)-S--)      intersects with both
                                # (----N-(--S-)-)        intersects with both
                                # (---(-N-S--)-)         intersects with inner only - distance between N and S + radius_S does not exceed radius_N

                                if inner_phash_id is not None:

                                    spheres_disjoint = node_hamming_distance > (
                                        node_radius + search_radius)

                                    if not spheres_disjoint:  # i.e. they intersect at some point

                                        next_potentials.append(inner_phash_id)

                                if outer_phash_id is not None:

                                    search_sphere_subset_of_node_sphere = (
                                        node_hamming_distance +
                                        search_radius) <= node_radius

                                    if not search_sphere_subset_of_node_sphere:  # i.e. search sphere intersects with non-node sphere space at some point

                                        next_potentials.append(outer_phash_id)

            if HG.db_report_mode:

                HydrusData.ShowText(
                    'Similar file search touched {} nodes over {} cycles.'.
                    format(HydrusData.ToHumanInt(total_nodes_searched),
                           HydrusData.ToHumanInt(num_cycles)))

            # so, now we have phash_ids and distances. let's map that to actual files.
            # files can have multiple phashes, and phashes can refer to multiple files, so let's make sure we are setting the smallest distance we found

            similar_phash_ids = list(similar_phash_ids_to_distances.keys())

            with HydrusDB.TemporaryIntegerTable(self._c, similar_phash_ids,
                                                'phash_id') as temp_table_name:

                # temp phashes to hash map
                similar_phash_ids_to_hash_ids = HydrusData.BuildKeyToListDict(
                    self._c.execute(
                        'SELECT phash_id, hash_id FROM {} CROSS JOIN shape_perceptual_hash_map USING ( phash_id );'
                        .format(temp_table_name)))

            similar_hash_ids_to_distances = {}

            for (phash_id, hash_ids) in similar_phash_ids_to_hash_ids.items():

                distance = similar_phash_ids_to_distances[phash_id]

                for hash_id in hash_ids:

                    if hash_id not in similar_hash_ids_to_distances:

                        similar_hash_ids_to_distances[hash_id] = distance

                    else:

                        current_distance = similar_hash_ids_to_distances[
                            hash_id]

                        if distance < current_distance:

                            similar_hash_ids_to_distances[hash_id] = distance

            similar_hash_ids_and_distances = list(
                similar_hash_ids_to_distances.items())

        return similar_hash_ids_and_distances

예제 #12

0

파일 보기

파일: ClientDBSimilarFiles.py 프로젝트: Wyrrrd/suika-hydrus-server

    def MaintainTree(self,
                     maintenance_mode=HC.MAINTENANCE_FORCED,
                     job_key=None,
                     stop_time=None):

        time_started = HydrusData.GetNow()
        pub_job_key = False
        job_key_pubbed = False

        if job_key is None:

            job_key = ClientThreading.JobKey(cancellable=True)

            pub_job_key = True

        try:

            job_key.SetVariable('popup_title',
                                'similar files metadata maintenance')

            rebalance_phash_ids = self._STL(
                self._c.execute(
                    'SELECT phash_id FROM shape_maintenance_branch_regen;'))

            num_to_do = len(rebalance_phash_ids)

            while len(rebalance_phash_ids) > 0:

                if pub_job_key and not job_key_pubbed and HydrusData.TimeHasPassed(
                        time_started + 5):

                    HG.client_controller.pub('modal_message', job_key)

                    job_key_pubbed = True

                (i_paused, should_quit) = job_key.WaitIfNeeded()

                should_stop = HG.client_controller.ShouldStopThisWork(
                    maintenance_mode, stop_time=stop_time)

                if should_quit or should_stop:

                    return

                num_done = num_to_do - len(rebalance_phash_ids)

                text = 'rebalancing similar file metadata - ' + HydrusData.ConvertValueRangeToPrettyString(
                    num_done, num_to_do)

                HG.client_controller.frame_splash_status.SetSubtext(text)
                job_key.SetVariable('popup_text_1', text)
                job_key.SetVariable('popup_gauge_1', (num_done, num_to_do))

                with HydrusDB.TemporaryIntegerTable(
                        self._c, rebalance_phash_ids,
                        'phash_id') as temp_table_name:

                    # temp phashes to tree
                    (biggest_phash_id, ) = self._c.execute(
                        'SELECT phash_id FROM {} CROSS JOIN shape_vptree USING ( phash_id ) ORDER BY inner_population + outer_population DESC;'
                        .format(temp_table_name)).fetchone()

                self._RegenerateBranch(job_key, biggest_phash_id)

                rebalance_phash_ids = self._STL(
                    self._c.execute(
                        'SELECT phash_id FROM shape_maintenance_branch_regen;')
                )

        finally:

            job_key.SetVariable('popup_text_1', 'done!')
            job_key.DeleteVariable('popup_gauge_1')
            job_key.DeleteVariable(
                'popup_text_2')  # used in the regenbranch call

            job_key.Finish()

            job_key.Delete(5)

예제 #13

0

파일 보기

파일: ClientDBSimilarFiles.py 프로젝트: Wyrrrd/suika-hydrus-server

    def _RegenerateBranch(self, job_key, phash_id):

        job_key.SetVariable('popup_text_2', 'reviewing existing branch')

        # grab everything in the branch

        (parent_id, ) = self._c.execute(
            'SELECT parent_id FROM shape_vptree WHERE phash_id = ?;',
            (phash_id, )).fetchone()

        cte_table_name = 'branch ( branch_phash_id )'
        initial_select = 'SELECT ?'
        recursive_select = 'SELECT phash_id FROM shape_vptree, branch ON parent_id = branch_phash_id'

        with_clause = 'WITH RECURSIVE ' + cte_table_name + ' AS ( ' + initial_select + ' UNION ALL ' + recursive_select + ')'

        unbalanced_nodes = self._c.execute(
            with_clause +
            ' SELECT branch_phash_id, phash FROM branch, shape_perceptual_hashes ON phash_id = branch_phash_id;',
            (phash_id, )).fetchall()

        # removal of old branch, maintenance schedule, and orphan phashes

        job_key.SetVariable(
            'popup_text_2',
            HydrusData.ToHumanInt(len(unbalanced_nodes)) +
            ' leaves found--now clearing out old branch')

        unbalanced_phash_ids = {p_id for (p_id, p_h) in unbalanced_nodes}

        self._c.executemany('DELETE FROM shape_vptree WHERE phash_id = ?;',
                            ((p_id, ) for p_id in unbalanced_phash_ids))

        self._c.executemany(
            'DELETE FROM shape_maintenance_branch_regen WHERE phash_id = ?;',
            ((p_id, ) for p_id in unbalanced_phash_ids))

        with HydrusDB.TemporaryIntegerTable(
                self._c, unbalanced_phash_ids,
                'phash_id') as temp_phash_ids_table_name:

            useful_phash_ids = self._STS(
                self._c.execute(
                    'SELECT phash_id FROM {} CROSS JOIN shape_perceptual_hash_map USING ( phash_id );'
                    .format(temp_phash_ids_table_name)))

        orphan_phash_ids = unbalanced_phash_ids.difference(useful_phash_ids)

        self._c.executemany(
            'DELETE FROM shape_perceptual_hashes WHERE phash_id = ?;',
            ((p_id, ) for p_id in orphan_phash_ids))

        useful_nodes = [
            row for row in unbalanced_nodes if row[0] in useful_phash_ids
        ]

        useful_population = len(useful_nodes)

        # now create the new branch, starting by choosing a new root and updating the parent's left/right reference to that

        if useful_population > 0:

            (new_phash_id, new_phash) = self._PopBestRootNode(
                useful_nodes)  #HydrusData.RandomPop( useful_nodes )

        else:

            new_phash_id = None

        if parent_id is not None:

            (parent_inner_id, ) = self._c.execute(
                'SELECT inner_id FROM shape_vptree WHERE phash_id = ?;',
                (parent_id, )).fetchone()

            if parent_inner_id == phash_id:

                query = 'UPDATE shape_vptree SET inner_id = ?, inner_population = ? WHERE phash_id = ?;'

            else:

                query = 'UPDATE shape_vptree SET outer_id = ?, outer_population = ? WHERE phash_id = ?;'

            self._c.execute(query,
                            (new_phash_id, useful_population, parent_id))

        if useful_population > 0:

            self._GenerateBranch(job_key, parent_id, new_phash_id, new_phash,
                                 useful_nodes)

예제 #14

0

파일 보기

 def _PopulateHashIdsToHashesCache( self, hash_ids, exception_on_error = False ):
     
     if len( self._hash_ids_to_hashes_cache ) > 100000:
         
         if not isinstance( hash_ids, set ):
             
             hash_ids = set( hash_ids )
             
         
         self._hash_ids_to_hashes_cache = { hash_id : hash for ( hash_id, hash ) in self._hash_ids_to_hashes_cache.items() if hash_id in hash_ids }
         
     
     uncached_hash_ids = { hash_id for hash_id in hash_ids if hash_id not in self._hash_ids_to_hashes_cache }
     
     if len( uncached_hash_ids ) > 0:
         
         pubbed_error = False
         
         if len( uncached_hash_ids ) == 1:
             
             ( uncached_hash_id, ) = uncached_hash_ids
             
             rows = self._c.execute( 'SELECT hash_id, hash FROM hashes WHERE hash_id = ?;', ( uncached_hash_id, )  ).fetchall()
             
         else:
             
             with HydrusDB.TemporaryIntegerTable( self._c, uncached_hash_ids, 'hash_id' ) as temp_table_name:
                 
                 # temp hash_ids to actual hashes
                 rows = self._c.execute( 'SELECT hash_id, hash FROM {} CROSS JOIN hashes USING ( hash_id );'.format( temp_table_name ) ).fetchall()
                 
             
         
         uncached_hash_ids_to_hashes = dict( rows )
         
         if len( uncached_hash_ids_to_hashes ) < len( uncached_hash_ids ):
             
             for hash_id in uncached_hash_ids:
                 
                 if hash_id not in uncached_hash_ids_to_hashes:
                     
                     if exception_on_error:
                         
                         raise HydrusExceptions.DataMissing( 'Did not find all entries for those hash ids!' )
                         
                     
                     HydrusData.DebugPrint( 'Database hash error: hash_id ' + str( hash_id ) + ' was missing!' )
                     HydrusData.PrintException( Exception( 'Missing file identifier stack trace.' ) )
                     
                     if not pubbed_error:
                         
                         HydrusData.ShowText( 'A file identifier was missing! This is a serious error that means your client database has an orphan file id! Think about contacting hydrus dev!' )
                         
                         pubbed_error = True
                         
                     
                     hash = bytes.fromhex( 'aaaaaaaaaaaaaaaa' ) + os.urandom( 16 )
                     
                     uncached_hash_ids_to_hashes[ hash_id ] = hash
                     
                 
             
         
         self._hash_ids_to_hashes_cache.update( uncached_hash_ids_to_hashes )