def Generate( self, file_service_id, tag_service_id ): self.CreateTables( file_service_id, tag_service_id ) # hash_ids = self.modules_files_storage.GetCurrentHashIdsList( file_service_id ) BLOCK_SIZE = 10000 for ( i, block_of_hash_ids ) in enumerate( HydrusData.SplitListIntoChunks( hash_ids, BLOCK_SIZE ) ): with self._MakeTemporaryIntegerTable( block_of_hash_ids, 'hash_id' ) as temp_hash_id_table_name: self.AddFiles( file_service_id, tag_service_id, block_of_hash_ids, temp_hash_id_table_name ) index_generation_dict = self._GetServiceIndexGenerationDictSingle( file_service_id, tag_service_id ) for ( table_name, columns, unique, version_added ) in self._FlattenIndexGenerationDict( index_generation_dict ): self._CreateIndex( table_name, columns, unique = unique ) self.modules_db_maintenance.TouchAnalyzeNewTables() self.modules_mappings_cache_specific_display.Generate( file_service_id, tag_service_id, populate_from_storage = True )
def do_it(hash_ids): for group_of_hash_ids in HydrusData.SplitListIntoChunks( hash_ids, 256): if HydrusThreading.IsThreadShuttingDown(): return hash_ids_to_tags_managers = HG.client_controller.Read( 'force_refresh_tags_managers', group_of_hash_ids) with self._lock: for (hash_id, tags_manager) in hash_ids_to_tags_managers.items(): media_result = self._hash_ids_to_media_results.get( hash_id, None) if media_result is not None: media_result.SetTagsManager(tags_manager) HG.client_controller.pub('refresh_all_tag_presentation_gui')
def Repopulate(self): self.ClearCache() HG.client_controller.frame_splash_status.SetSubtext( 'reading local file data') local_hash_ids = self.modules_files_storage.GetCurrentHashIdsList( self.modules_services.combined_local_file_service_id) BLOCK_SIZE = 10000 num_to_do = len(local_hash_ids) for (i, block_of_hash_ids) in enumerate( HydrusData.SplitListIntoChunks(local_hash_ids, BLOCK_SIZE)): HG.client_controller.frame_splash_status.SetSubtext( 'caching local file data {}'.format( HydrusData.ConvertValueRangeToPrettyString( i * BLOCK_SIZE, num_to_do))) self.AddHashIdsToCache(block_of_hash_ids)
def _DoExport( self ): query_hash_ids = HG.client_controller.Read( 'file_query_ids', self._file_search_context ) media_results = [] i = 0 base = 256 while i < len( query_hash_ids ): if HG.client_controller.new_options.GetBoolean( 'pause_export_folders_sync' ) or HydrusThreading.IsThreadShuttingDown(): return if i == 0: ( last_i, i ) = ( 0, base ) else: ( last_i, i ) = ( i, i + base ) sub_query_hash_ids = query_hash_ids[ last_i : i ] more_media_results = HG.client_controller.Read( 'media_results_from_ids', sub_query_hash_ids ) media_results.extend( more_media_results ) media_results.sort( key = lambda mr: mr.GetHashId() ) # terms = ParseExportPhrase( self._phrase ) previous_paths = set() for ( root, dirnames, filenames ) in os.walk( self._path ): previous_paths.update( ( os.path.join( root, filename ) for filename in filenames ) ) sync_paths = set() client_files_manager = HG.client_controller.client_files_manager num_copied = 0 for media_result in media_results: if HG.client_controller.new_options.GetBoolean( 'pause_export_folders_sync' ) or HydrusThreading.IsThreadShuttingDown(): return hash = media_result.GetHash() mime = media_result.GetMime() size = media_result.GetSize() try: source_path = client_files_manager.GetFilePath( hash, mime ) except HydrusExceptions.FileMissingException: raise Exception( 'A file to be exported, hash "{}", was missing! You should run file maintenance (under database->maintenance->files) to check the files for the export folder\'s search, and possibly all your files.' ) filename = GenerateExportFilename( self._path, media_result, terms ) dest_path = os.path.normpath( os.path.join( self._path, filename ) ) if not dest_path.startswith( self._path ): raise Exception( 'It seems a destination path for export folder "{}" was above the main export directory! The file was "{}" and its destination path was "{}".'.format( self._path, hash.hex(), dest_path ) ) dest_path_dir = os.path.dirname( dest_path ) HydrusPaths.MakeSureDirectoryExists( dest_path_dir ) if dest_path not in sync_paths: copied = HydrusPaths.MirrorFile( source_path, dest_path ) if copied: num_copied += 1 HydrusPaths.TryToGiveFileNicePermissionBits( dest_path ) sync_paths.add( dest_path ) if num_copied > 0: HydrusData.Print( 'Export folder ' + self._name + ' exported ' + HydrusData.ToHumanInt( num_copied ) + ' files.' ) if self._export_type == HC.EXPORT_FOLDER_TYPE_SYNCHRONISE: deletee_paths = previous_paths.difference( sync_paths ) for deletee_path in deletee_paths: ClientPaths.DeletePath( deletee_path ) deletee_dirs = set() for ( root, dirnames, filenames ) in os.walk( self._path, topdown = False ): if root == self._path: continue no_files = len( filenames ) == 0 useful_dirnames = [ dirname for dirname in dirnames if os.path.join( root, dirname ) not in deletee_dirs ] no_useful_dirs = len( useful_dirnames ) == 0 if no_useful_dirs and no_files: deletee_dirs.add( root ) for deletee_dir in deletee_dirs: if os.path.exists( deletee_dir ): HydrusPaths.DeletePath( deletee_dir ) if len( deletee_paths ) > 0: HydrusData.Print( 'Export folder {} deleted {} files and {} folders.'.format( self._name, HydrusData.ToHumanInt( len( deletee_paths ) ), HydrusData.ToHumanInt( len( deletee_dirs ) ) ) ) if self._delete_from_client_after_export: local_file_service_keys = HG.client_controller.services_manager.GetServiceKeys( ( HC.LOCAL_FILE_DOMAIN, ) ) service_keys_to_deletee_hashes = collections.defaultdict( list ) delete_lock_for_archived_files = HG.client_controller.new_options.GetBoolean( 'delete_lock_for_archived_files' ) for media_result in media_results: if delete_lock_for_archived_files and not media_result.GetInbox(): continue hash = media_result.GetHash() deletee_service_keys = media_result.GetLocationsManager().GetCurrent().intersection( local_file_service_keys ) for deletee_service_key in deletee_service_keys: service_keys_to_deletee_hashes[ deletee_service_key ].append( hash ) reason = 'Deleted after export to Export Folder "{}".'.format( self._path ) for ( service_key, deletee_hashes ) in service_keys_to_deletee_hashes.items(): chunks_of_hashes = HydrusData.SplitListIntoChunks( deletee_hashes, 64 ) for chunk_of_hashes in chunks_of_hashes: content_update = HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_DELETE, chunk_of_hashes, reason = reason ) HG.client_controller.WriteSynchronous( 'content_updates', { service_key : [ content_update ] } )
def Search(self, hash_id, max_hamming_distance): if max_hamming_distance == 0: similar_hash_ids = self._STL( self._c.execute( 'SELECT hash_id FROM shape_perceptual_hash_map WHERE phash_id IN ( SELECT phash_id FROM shape_perceptual_hash_map WHERE hash_id = ? );', (hash_id, ))) similar_hash_ids_and_distances = [ (similar_hash_id, 0) for similar_hash_id in similar_hash_ids ] else: search_radius = max_hamming_distance top_node_result = self._c.execute( 'SELECT phash_id FROM shape_vptree WHERE parent_id IS NULL;' ).fetchone() if top_node_result is None: return [] (root_node_phash_id, ) = top_node_result search = self._STL( self._c.execute( 'SELECT phash FROM shape_perceptual_hashes NATURAL JOIN shape_perceptual_hash_map WHERE hash_id = ?;', (hash_id, ))) if len(search) == 0: return [] similar_phash_ids_to_distances = {} num_cycles = 0 total_nodes_searched = 0 for search_phash in search: next_potentials = [root_node_phash_id] while len(next_potentials) > 0: current_potentials = next_potentials next_potentials = [] num_cycles += 1 total_nodes_searched += len(current_potentials) for group_of_current_potentials in HydrusData.SplitListIntoChunks( current_potentials, 10000): # this is split into fixed lists of results of subgroups because as an iterable it was causing crashes on linux!! # after investigation, it seemed to be SQLite having a problem with part of Get64BitHammingDistance touching phashes it presumably was still hanging on to # the crash was in sqlite code, again presumably on subsequent fetch # adding a delay in seemed to fix it as well. guess it was some memory maintenance buffer/bytes thing # anyway, we now just get the whole lot of results first and then work on the whole lot ''' #old method select_statement = 'SELECT phash_id, phash, radius, inner_id, outer_id FROM shape_perceptual_hashes NATURAL JOIN shape_vptree WHERE phash_id = ?;' results = list( self._ExecuteManySelectSingleParam( select_statement, group_of_current_potentials ) ) ''' with HydrusDB.TemporaryIntegerTable( self._c, group_of_current_potentials, 'phash_id') as temp_table_name: # temp phash_ids to actual phashes and tree info results = self._c.execute( 'SELECT phash_id, phash, radius, inner_id, outer_id FROM {} CROSS JOIN shape_perceptual_hashes USING ( phash_id ) CROSS JOIN shape_vptree USING ( phash_id );' .format(temp_table_name)).fetchall() for (node_phash_id, node_phash, node_radius, inner_phash_id, outer_phash_id) in results: # first check the node itself--is it similar? node_hamming_distance = HydrusData.Get64BitHammingDistance( search_phash, node_phash) if node_hamming_distance <= search_radius: if node_phash_id in similar_phash_ids_to_distances: current_distance = similar_phash_ids_to_distances[ node_phash_id] similar_phash_ids_to_distances[ node_phash_id] = min( node_hamming_distance, current_distance) else: similar_phash_ids_to_distances[ node_phash_id] = node_hamming_distance # now how about its children? if node_radius is not None: # we have two spheres--node and search--their centers separated by node_hamming_distance # we want to search inside/outside the node_sphere if the search_sphere intersects with those spaces # there are four possibles: # (----N----)-(--S--) intersects with outer only - distance between N and S > their radii # (----N---(-)-S--) intersects with both # (----N-(--S-)-) intersects with both # (---(-N-S--)-) intersects with inner only - distance between N and S + radius_S does not exceed radius_N if inner_phash_id is not None: spheres_disjoint = node_hamming_distance > ( node_radius + search_radius) if not spheres_disjoint: # i.e. they intersect at some point next_potentials.append(inner_phash_id) if outer_phash_id is not None: search_sphere_subset_of_node_sphere = ( node_hamming_distance + search_radius) <= node_radius if not search_sphere_subset_of_node_sphere: # i.e. search sphere intersects with non-node sphere space at some point next_potentials.append(outer_phash_id) if HG.db_report_mode: HydrusData.ShowText( 'Similar file search touched {} nodes over {} cycles.'. format(HydrusData.ToHumanInt(total_nodes_searched), HydrusData.ToHumanInt(num_cycles))) # so, now we have phash_ids and distances. let's map that to actual files. # files can have multiple phashes, and phashes can refer to multiple files, so let's make sure we are setting the smallest distance we found similar_phash_ids = list(similar_phash_ids_to_distances.keys()) with HydrusDB.TemporaryIntegerTable(self._c, similar_phash_ids, 'phash_id') as temp_table_name: # temp phashes to hash map similar_phash_ids_to_hash_ids = HydrusData.BuildKeyToListDict( self._c.execute( 'SELECT phash_id, hash_id FROM {} CROSS JOIN shape_perceptual_hash_map USING ( phash_id );' .format(temp_table_name))) similar_hash_ids_to_distances = {} for (phash_id, hash_ids) in similar_phash_ids_to_hash_ids.items(): distance = similar_phash_ids_to_distances[phash_id] for hash_id in hash_ids: if hash_id not in similar_hash_ids_to_distances: similar_hash_ids_to_distances[hash_id] = distance else: current_distance = similar_hash_ids_to_distances[ hash_id] if distance < current_distance: similar_hash_ids_to_distances[hash_id] = distance similar_hash_ids_and_distances = list( similar_hash_ids_to_distances.items()) return similar_hash_ids_and_distances
def _DoExport( self ): query_hash_ids = HG.client_controller.Read( 'file_query_ids', self._file_search_context ) media_results = [] i = 0 base = 256 while i < len( query_hash_ids ): if HC.options[ 'pause_export_folders_sync' ] or HydrusThreading.IsThreadShuttingDown(): return if i == 0: ( last_i, i ) = ( 0, base ) else: ( last_i, i ) = ( i, i + base ) sub_query_hash_ids = query_hash_ids[ last_i : i ] more_media_results = HG.client_controller.Read( 'media_results_from_ids', sub_query_hash_ids ) media_results.extend( more_media_results ) media_results.sort( key = lambda mr: mr.GetHashId() ) # terms = ParseExportPhrase( self._phrase ) previous_paths = set() for ( root, dirnames, filenames ) in os.walk( self._path ): previous_paths.update( ( os.path.join( root, filename ) for filename in filenames ) ) sync_paths = set() client_files_manager = HG.client_controller.client_files_manager num_copied = 0 for media_result in media_results: if HC.options[ 'pause_export_folders_sync' ] or HydrusThreading.IsThreadShuttingDown(): return hash = media_result.GetHash() mime = media_result.GetMime() size = media_result.GetSize() source_path = client_files_manager.GetFilePath( hash, mime ) filename = GenerateExportFilename( self._path, media_result, terms ) dest_path = os.path.normpath( os.path.join( self._path, filename ) ) if not dest_path.startswith( self._path ): raise Exception( 'It seems a destination path for export folder "{}" was above the main export directory! The file was "{}" and its destination path was "{}".'.format( self._path, hash.hex(), dest_path ) ) dest_path_dir = os.path.dirname( dest_path ) HydrusPaths.MakeSureDirectoryExists( dest_path_dir ) if dest_path not in sync_paths: copied = HydrusPaths.MirrorFile( source_path, dest_path ) if copied: num_copied += 1 HydrusPaths.MakeFileWritable( dest_path ) sync_paths.add( dest_path ) if num_copied > 0: HydrusData.Print( 'Export folder ' + self._name + ' exported ' + HydrusData.ToHumanInt( num_copied ) + ' files.' ) if self._export_type == HC.EXPORT_FOLDER_TYPE_SYNCHRONISE: deletee_paths = previous_paths.difference( sync_paths ) for deletee_path in deletee_paths: ClientPaths.DeletePath( deletee_path ) deletee_dirs = set() for ( root, dirnames, filenames ) in os.walk( self._path, topdown = False ): if root == self._path: continue no_files = len( filenames ) == 0 useful_dirnames = [ dirname for dirname in dirnames if os.path.join( root, dirname ) not in deletee_dirs ] no_useful_dirs = len( useful_dirnames ) == 0 if no_useful_dirs and no_files: deletee_dirs.add( root ) for deletee_dir in deletee_dirs: if os.path.exists( deletee_dir ): HydrusPaths.DeletePath( deletee_dir ) if len( deletee_paths ) > 0: HydrusData.Print( 'Export folder {} deleted {} files and {} folders.'.format( self._name, HydrusData.ToHumanInt( len( deletee_paths ) ), HydrusData.ToHumanInt( len( deletee_dirs ) ) ) ) if self._delete_from_client_after_export: deletee_hashes = { media_result.GetHash() for media_result in media_results } chunks_of_hashes = HydrusData.SplitListIntoChunks( deletee_hashes, 64 ) reason = 'Deleted after export to Export Folder "{}".'.format( self._path ) content_updates = [ HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_DELETE, chunk_of_hashes, reason = reason ) for chunk_of_hashes in chunks_of_hashes ] for content_update in content_updates: HG.client_controller.WriteSynchronous( 'content_updates', { CC.LOCAL_FILE_SERVICE_KEY : [ content_update ] } )
def do_it(directory, neighbouring_txt_tag_service_keys, delete_afterwards, export_symlinks, quit_afterwards): job_key = ClientThreading.JobKey(cancellable=True) job_key.SetStatusTitle('file export') HG.client_controller.pub('message', job_key) pauser = HydrusData.BigJobPauser() for (index, (ordering_index, media, path)) in enumerate(to_do): if job_key.IsCancelled(): break try: x_of_y = HydrusData.ConvertValueRangeToPrettyString( index + 1, num_to_do) job_key.SetVariable('popup_text_1', 'Done {}'.format(x_of_y)) job_key.SetVariable('popup_gauge_1', (index + 1, num_to_do)) QP.CallAfter(qt_update_label, x_of_y) hash = media.GetHash() mime = media.GetMime() path = os.path.normpath(path) if not path.startswith(directory): raise Exception( 'It seems a destination path was above the main export directory! The file was "{}" and its destination path was "{}".' .format(hash.hex(), path)) path_dir = os.path.dirname(path) HydrusPaths.MakeSureDirectoryExists(path_dir) if export_tag_txts: tags_manager = media.GetTagsManager() tags = set() for service_key in neighbouring_txt_tag_service_keys: current_tags = tags_manager.GetCurrent( service_key, ClientTags.TAG_DISPLAY_ACTUAL) tags.update(current_tags) tags = sorted(tags) txt_path = path + '.txt' with open(txt_path, 'w', encoding='utf-8') as f: f.write(os.linesep.join(tags)) source_path = client_files_manager.GetFilePath( hash, mime, check_file_exists=False) if export_symlinks: os.symlink(source_path, path) else: HydrusPaths.MirrorFile(source_path, path) HydrusPaths.MakeFileWriteable(path) except: QP.CallAfter( QW.QMessageBox.information, self, 'Information', 'Encountered a problem while attempting to export file with index ' + str(ordering_index + 1) + ':' + os.linesep * 2 + traceback.format_exc()) break pauser.Pause() if not job_key.IsCancelled() and delete_afterwards: QP.CallAfter(qt_update_label, 'deleting') delete_lock_for_archived_files = HG.client_controller.new_options.GetBoolean( 'delete_lock_for_archived_files') if delete_lock_for_archived_files: deletee_hashes = { media.GetHash() for (ordering_index, media, path) in to_do if not media.HasArchive() } else: deletee_hashes = { media.GetHash() for (ordering_index, media, path) in to_do } chunks_of_hashes = HydrusData.SplitListIntoChunks( deletee_hashes, 64) reason = 'Deleted after manual export to "{}".'.format( directory) content_updates = [ HydrusData.ContentUpdate(HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_DELETE, chunk_of_hashes, reason=reason) for chunk_of_hashes in chunks_of_hashes ] for content_update in content_updates: HG.client_controller.WriteSynchronous( 'content_updates', {CC.LOCAL_FILE_SERVICE_KEY: [content_update]}) job_key.DeleteVariable('popup_gauge_1') job_key.SetVariable('popup_text_1', 'Done!') job_key.Finish() job_key.Delete(5) QP.CallAfter(qt_update_label, 'done!') time.sleep(1) QP.CallAfter(qt_update_label, 'export') QP.CallAfter(qt_done, quit_afterwards)
def work_callable(): job_key = ClientThreading.JobKey( cancellable = True ) title = 'moving files' if action == HC.CONTENT_UPDATE_MOVE else 'adding files' job_key.SetStatusTitle( title ) BLOCK_SIZE = 64 if len( applicable_media ) > BLOCK_SIZE: HG.client_controller.pub( 'message', job_key ) pauser = HydrusData.BigJobPauser() num_to_do = len( applicable_media ) now = HydrusData.GetNow() for ( i, block_of_media ) in enumerate( HydrusData.SplitListIntoChunks( applicable_media, BLOCK_SIZE ) ): if job_key.IsCancelled(): break job_key.SetVariable( 'popup_text_1', HydrusData.ConvertValueRangeToPrettyString( i * BLOCK_SIZE, num_to_do ) ) job_key.SetVariable( 'popup_gauge_1', ( i * BLOCK_SIZE, num_to_do ) ) content_updates = [] undelete_hashes = set() for m in block_of_media: if dest_service_key in m.GetLocationsManager().GetDeleted(): undelete_hashes.add( m.GetHash() ) else: content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_ADD, ( m.GetMediaResult().GetFileInfoManager(), now ) ) ) if len( undelete_hashes ) > 0: content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_UNDELETE, undelete_hashes ) ) HG.client_controller.WriteSynchronous( 'content_updates', { dest_service_key : content_updates } ) if action == HC.CONTENT_UPDATE_MOVE: block_of_hashes = [ m.GetHash() for m in block_of_media ] content_updates = [ HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_DELETE, block_of_hashes, reason = 'Moved to {}'.format( dest_service_name ) ) ] HG.client_controller.WriteSynchronous( 'content_updates', { source_service_key : content_updates } ) pauser.Pause() job_key.Delete()
def do_it(directory, neighbouring_txt_tag_service_keys, delete_afterwards, export_symlinks, quit_afterwards): pauser = HydrusData.BigJobPauser() for (index, (ordering_index, media)) in enumerate(to_do): try: QP.CallAfter( qt_update_label, HydrusData.ConvertValueRangeToPrettyString( index + 1, num_to_do)) hash = media.GetHash() mime = media.GetMime() path = self._GetPath(media) path = os.path.normpath(path) if not path.startswith(directory): raise Exception( 'It seems a destination path was above the main export directory! The file was "{}" and its destination path was "{}".' .format(hash.hex(), path)) path_dir = os.path.dirname(path) HydrusPaths.MakeSureDirectoryExists(path_dir) if export_tag_txts: tags_manager = media.GetTagsManager() tags = set() for service_key in neighbouring_txt_tag_service_keys: current_tags = tags_manager.GetCurrent( service_key, ClientTags.TAG_DISPLAY_SIBLINGS_AND_PARENTS) tags.update(current_tags) tags = sorted(tags) txt_path = path + '.txt' with open(txt_path, 'w', encoding='utf-8') as f: f.write(os.linesep.join(tags)) source_path = client_files_manager.GetFilePath( hash, mime, check_file_exists=False) if export_symlinks: os.symlink(source_path, path) else: HydrusPaths.MirrorFile(source_path, path) HydrusPaths.MakeFileWritable(path) except: QP.CallAfter( QW.QMessageBox.information, self, 'Information', 'Encountered a problem while attempting to export file with index ' + str(ordering_index + 1) + ':' + os.linesep * 2 + traceback.format_exc()) break pauser.Pause() if delete_afterwards: QP.CallAfter(qt_update_label, 'deleting') deletee_hashes = { media.GetHash() for (ordering_index, media) in to_do } chunks_of_hashes = HydrusData.SplitListIntoChunks( deletee_hashes, 64) reason = 'Deleted after manual export to "{}".'.format( directory) content_updates = [ HydrusData.ContentUpdate(HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_DELETE, chunk_of_hashes, reason=reason) for chunk_of_hashes in chunks_of_hashes ] for content_update in content_updates: HG.client_controller.WriteSynchronous( 'content_updates', {CC.LOCAL_FILE_SERVICE_KEY: [content_update]}) QP.CallAfter(qt_update_label, 'done!') time.sleep(1) QP.CallAfter(qt_update_label, 'export') QP.CallAfter(qt_done, quit_afterwards)