def DisassociatePHashes(self, hash_id, phash_ids): self._c.executemany( 'DELETE FROM shape_perceptual_hash_map WHERE phash_id = ? AND hash_id = ?;', ((phash_id, hash_id) for phash_id in phash_ids)) useful_phash_ids = { phash for (phash, ) in self._c.execute( 'SELECT phash_id FROM shape_perceptual_hash_map WHERE phash_id IN ' + HydrusData.SplayListForDB(phash_ids) + ';') } useless_phash_ids = phash_ids.difference(useful_phash_ids) self._c.executemany( 'INSERT OR IGNORE INTO shape_maintenance_branch_regen ( phash_id ) VALUES ( ? );', ((phash_id, ) for phash_id in useless_phash_ids))
def GetFileHashes(self, given_hashes, given_hash_type, desired_hash_type) -> typing.Collection[bytes]: if given_hash_type == 'sha256': hash_ids = self.GetHashIds(given_hashes) else: hash_ids = [] for given_hash in given_hashes: if given_hash is None: continue result = self._Execute( 'SELECT hash_id FROM local_hashes WHERE {} = ?;'.format( given_hash_type), (sqlite3.Binary(given_hash), )).fetchone() if result is not None: (hash_id, ) = result hash_ids.append(hash_id) if desired_hash_type == 'sha256': desired_hashes = self.GetHashes(hash_ids) else: desired_hashes = [ desired_hash for (desired_hash, ) in self._Execute( 'SELECT {} FROM local_hashes WHERE hash_id IN {};'.format( desired_hash_type, HydrusData.SplayListForDB( hash_ids))) ] return desired_hashes
def GetCurrentFilesCount(self, service_id, only_viewable=False): current_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_CURRENT) if only_viewable: # hashes to mimes result = self._c.execute( 'SELECT COUNT( * ) FROM {} CROSS JOIN files_info USING ( hash_id ) WHERE mime IN {};' .format(current_files_table_name, HydrusData.SplayListForDB( HC.SEARCHABLE_MIMES))).fetchone() else: result = self._c.execute('SELECT COUNT( * ) FROM {};'.format( current_files_table_name)).fetchone() (count, ) = result return count
def GetRepositoryUpdateHashesICanProcess(self, service_key: bytes, content_types_to_process): # it is important that we use lists and sort by update index! # otherwise add/delete actions can occur in the wrong order service_id = self.modules_services.GetServiceId(service_key) (repository_updates_table_name, repository_unregistered_updates_table_name, repository_updates_processed_table_name ) = GenerateRepositoryUpdatesTableNames(service_id) result = self._Execute( 'SELECT 1 FROM {} WHERE content_type = ? AND processed = ?;'. format(repository_updates_processed_table_name), (HC.CONTENT_TYPE_DEFINITIONS, True)).fetchone() this_is_first_definitions_work = result is None result = self._Execute( 'SELECT 1 FROM {} WHERE content_type != ? AND processed = ?;'. format(repository_updates_processed_table_name), (HC.CONTENT_TYPE_DEFINITIONS, True)).fetchone() this_is_first_content_work = result is None min_unregistered_update_index = None result = self._Execute( 'SELECT MIN( update_index ) FROM {} CROSS JOIN {} USING ( hash_id );' .format(repository_unregistered_updates_table_name, repository_updates_table_name)).fetchone() if result is not None: (min_unregistered_update_index, ) = result predicate_phrase = 'processed = ? AND content_type IN {}'.format( HydrusData.SplayListForDB(content_types_to_process)) if min_unregistered_update_index is not None: # can't process an update if any of its files are as yet unregistered (these are both unprocessed and unavailable) # also, we mustn't skip any update indices, so if there is an invalid one, we won't do any after that! predicate_phrase = '{} AND update_index < {}'.format( predicate_phrase, min_unregistered_update_index) query = 'SELECT update_index, hash_id, content_type FROM {} CROSS JOIN {} USING ( hash_id ) WHERE {};'.format( repository_updates_processed_table_name, repository_updates_table_name, predicate_phrase) rows = self._Execute(query, (False, )).fetchall() update_indices_to_unprocessed_hash_ids = HydrusData.BuildKeyToSetDict( ((update_index, hash_id) for (update_index, hash_id, content_type) in rows)) hash_ids_to_content_types_to_process = HydrusData.BuildKeyToSetDict( ((hash_id, content_type) for (update_index, hash_id, content_type) in rows)) all_hash_ids = set(hash_ids_to_content_types_to_process.keys()) all_local_hash_ids = self.modules_files_storage.FilterHashIdsToStatus( self.modules_services.local_update_service_id, all_hash_ids, HC.CONTENT_STATUS_CURRENT) for sorted_update_index in sorted( update_indices_to_unprocessed_hash_ids.keys()): unprocessed_hash_ids = update_indices_to_unprocessed_hash_ids[ sorted_update_index] if not unprocessed_hash_ids.issubset(all_local_hash_ids): # can't process an update if any of its unprocessed files are not local # normally they'll always be available if registered, but just in case a user deletes one manually etc... # also, we mustn't skip any update indices, so if there is an invalid one, we won't do any after that! update_indices_to_unprocessed_hash_ids = { update_index: unprocessed_hash_ids for (update_index, unprocessed_hash_ids ) in update_indices_to_unprocessed_hash_ids.items() if update_index < sorted_update_index } break # all the hashes are now good to go all_hash_ids = set( itertools.chain.from_iterable( update_indices_to_unprocessed_hash_ids.values())) hash_ids_to_hashes = self.modules_hashes_local_cache.GetHashIdsToHashes( hash_ids=all_hash_ids) definition_hashes_and_content_types = [] content_hashes_and_content_types = [] if len(update_indices_to_unprocessed_hash_ids) > 0: for update_index in sorted( update_indices_to_unprocessed_hash_ids.keys()): unprocessed_hash_ids = update_indices_to_unprocessed_hash_ids[ update_index] definition_hash_ids = { hash_id for hash_id in unprocessed_hash_ids if HC.CONTENT_TYPE_DEFINITIONS in hash_ids_to_content_types_to_process[hash_id] } content_hash_ids = { hash_id for hash_id in unprocessed_hash_ids if hash_id not in definition_hash_ids } for (hash_ids, hashes_and_content_types) in [ (definition_hash_ids, definition_hashes_and_content_types), (content_hash_ids, content_hashes_and_content_types) ]: hashes_and_content_types.extend( ((hash_ids_to_hashes[hash_id], hash_ids_to_content_types_to_process[hash_id]) for hash_id in hash_ids)) return (this_is_first_definitions_work, definition_hashes_and_content_types, this_is_first_content_work, content_hashes_and_content_types)