def GetBandwidthStringsAndGaugeTuples(self, bandwidth_tracker, threshold=600): with self._lock: rows = [] rules_sorted = list(self._rules) def key(rule_tuple): (bandwidth_type, time_delta, max_allowed) = rule_tuple if time_delta is None: return -1 else: return time_delta rules_sorted.sort(key=key) for (bandwidth_type, time_delta, max_allowed) in rules_sorted: time_is_less_than_threshold = time_delta is not None and time_delta <= threshold if time_is_less_than_threshold or max_allowed == 0: continue usage = bandwidth_tracker.GetUsage(bandwidth_type, time_delta) s = 'used ' if bandwidth_type == HC.BANDWIDTH_TYPE_DATA: s += HydrusData.ConvertValueRangeToBytes( usage, max_allowed) elif bandwidth_type == HC.BANDWIDTH_TYPE_REQUESTS: s += HydrusData.ConvertValueRangeToPrettyString( usage, max_allowed) + ' requests' if time_delta is None: s += ' this month' else: s += ' in the past ' + HydrusData.TimeDeltaToPrettyTimeDelta( time_delta) rows.append((s, (usage, max_allowed))) return rows
def RegenerateSearchableSubtagMap(self, file_service_id, tag_service_id, status_hook=None): subtags_fts4_table_name = self.GetSubtagsFTS4TableName( file_service_id, tag_service_id) subtags_searchable_map_table_name = self.GetSubtagsSearchableMapTableName( file_service_id, tag_service_id) self._Execute( 'DELETE FROM {};'.format(subtags_searchable_map_table_name)) query = 'SELECT docid FROM {};'.format(subtags_fts4_table_name) BLOCK_SIZE = 10000 for (group_of_subtag_ids, num_done, num_to_do) in HydrusDB.ReadLargeIdQueryInSeparateChunks( self._c, query, BLOCK_SIZE): for subtag_id in group_of_subtag_ids: result = self._Execute( 'SELECT subtag FROM subtags WHERE subtag_id = ?;', (subtag_id, )).fetchone() if result is None: continue (subtag, ) = result searchable_subtag = ClientSearch.ConvertSubtagToSearchable( subtag) if searchable_subtag != subtag: searchable_subtag_id = self.modules_tags.GetSubtagId( searchable_subtag) self._Execute( 'INSERT OR IGNORE INTO {} ( subtag_id, searchable_subtag_id ) VALUES ( ?, ? );' .format(subtags_searchable_map_table_name), (subtag_id, searchable_subtag_id)) message = HydrusData.ConvertValueRangeToPrettyString( num_done, num_to_do) HG.client_controller.frame_splash_status.SetSubtext(message) if status_hook is not None: status_hook(message)
def RegeneratePending(self, tag_service_id, status_hook=None): (current_mappings_table_name, deleted_mappings_table_name, pending_mappings_table_name, petitioned_mappings_table_name ) = ClientDBMappingsStorage.GenerateMappingsTableNames(tag_service_id) if status_hook is not None: message = 'clearing old combined display data' status_hook(message) all_pending_storage_tag_ids = self._STS( self._Execute('SELECT DISTINCT tag_id FROM {};'.format( pending_mappings_table_name))) self.modules_mappings_counts.ClearCounts( ClientTags.TAG_DISPLAY_STORAGE, self.modules_services.combined_file_service_id, tag_service_id, keep_current=True) counts_cache_changes = [] num_to_do = len(all_pending_storage_tag_ids) for (i, storage_tag_id) in enumerate(all_pending_storage_tag_ids): if i % 100 == 0 and status_hook is not None: message = 'regenerating pending tags {}'.format( HydrusData.ConvertValueRangeToPrettyString( i + 1, num_to_do)) status_hook(message) (pending_delta, ) = self._Execute( 'SELECT COUNT( DISTINCT hash_id ) FROM {} WHERE tag_id = ?;'. format(pending_mappings_table_name), (storage_tag_id, )).fetchone() counts_cache_changes.append((storage_tag_id, 0, pending_delta)) self.modules_mappings_counts_update.AddCounts( ClientTags.TAG_DISPLAY_STORAGE, self.modules_services.combined_file_service_id, tag_service_id, counts_cache_changes) self.modules_mappings_cache_combined_files_display.RegeneratePending( tag_service_id, status_hook=status_hook)
def do_it( urls ): job_key = None num_urls = len( urls ) if num_urls > 5: job_key = ClientThreading.JobKey( pausable = True, cancellable = True ) job_key.SetVariable( 'popup_title', 'Opening URLs' ) HG.client_controller.pub( 'message', job_key ) try: for ( i, url ) in enumerate( urls ): if job_key is not None: ( i_paused, should_quit ) = job_key.WaitIfNeeded() if should_quit: return job_key.SetVariable( 'popup_text_1', HydrusData.ConvertValueRangeToPrettyString( i + 1, num_urls ) ) job_key.SetVariable( 'popup_gauge_1', ( i + 1, num_urls ) ) ClientPaths.LaunchURLInWebBrowser( url ) time.sleep( 1 ) finally: if job_key is not None: job_key.Finish() job_key.Delete( 1 )
def RegeneratePending( self, file_service_id, tag_service_id, status_hook = None ): ( current_mappings_table_name, deleted_mappings_table_name, pending_mappings_table_name, petitioned_mappings_table_name ) = ClientDBMappingsStorage.GenerateMappingsTableNames( tag_service_id ) ( cache_current_mappings_table_name, cache_deleted_mappings_table_name, cache_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames( file_service_id, tag_service_id ) if status_hook is not None: message = 'clearing old specific data' status_hook( message ) all_pending_storage_tag_ids = self._STS( self._Execute( 'SELECT DISTINCT tag_id FROM {};'.format( pending_mappings_table_name ) ) ) self.modules_mappings_counts.ClearCounts( ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id, keep_current = True ) self._Execute( 'DELETE FROM {};'.format( cache_pending_mappings_table_name ) ) counts_cache_changes = [] num_to_do = len( all_pending_storage_tag_ids ) select_table_join = self.modules_files_storage.GetTableJoinLimitedByFileDomain( file_service_id, pending_mappings_table_name, HC.CONTENT_STATUS_CURRENT ) for ( i, storage_tag_id ) in enumerate( all_pending_storage_tag_ids ): if i % 100 == 0 and status_hook is not None: message = 'regenerating pending tags {}'.format( HydrusData.ConvertValueRangeToPrettyString( i + 1, num_to_do ) ) status_hook( message ) self._Execute( 'INSERT OR IGNORE INTO {} ( tag_id, hash_id ) SELECT tag_id, hash_id FROM {} WHERE tag_id = ?;'.format( cache_pending_mappings_table_name, select_table_join ), ( storage_tag_id, ) ) pending_delta = self._GetRowCount() counts_cache_changes.append( ( storage_tag_id, 0, pending_delta ) ) self.modules_mappings_counts_update.AddCounts( ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id, counts_cache_changes ) self.modules_mappings_cache_specific_display.RegeneratePending( file_service_id, tag_service_id, status_hook = status_hook )
def Repopulate(self): self.ClearCache() HG.client_controller.frame_splash_status.SetSubtext( 'reading local file data') local_hash_ids = self.modules_files_storage.GetCurrentHashIdsList( self.modules_services.combined_local_file_service_id) BLOCK_SIZE = 10000 num_to_do = len(local_hash_ids) for (i, block_of_hash_ids) in enumerate( HydrusData.SplitListIntoChunks(local_hash_ids, BLOCK_SIZE)): HG.client_controller.frame_splash_status.SetSubtext( 'caching local file data {}'.format( HydrusData.ConvertValueRangeToPrettyString( i * BLOCK_SIZE, num_to_do))) self.AddHashIdsToCache(block_of_hash_ids)
def _Update(self): if self._file_seed_cache is None: self._import_summary_st.clear() self._progress_st.clear() self._progress_gauge.SetRange(1) self._progress_gauge.SetValue(0) if self._file_seed_cache_button.isEnabled(): self._file_seed_cache_button.setEnabled(False) else: file_seed_cache_status = self._file_seed_cache.GetStatus() (num_done, num_to_do) = file_seed_cache_status.GetValueRange() self._import_summary_st.setText( file_seed_cache_status.GetStatusText()) if num_to_do == 0: self._progress_st.clear() else: self._progress_st.setText( HydrusData.ConvertValueRangeToPrettyString( num_done, num_to_do)) self._progress_gauge.SetRange(num_to_do) self._progress_gauge.SetValue(num_done) if not self._file_seed_cache_button.isEnabled(): self._file_seed_cache_button.setEnabled(True)
def THREADDownloadURLs( job_key, urls, title ): job_key.SetVariable( 'popup_title', title ) job_key.SetVariable( 'popup_text_1', 'initialising' ) num_successful = 0 num_redundant = 0 num_deleted = 0 num_failed = 0 presentation_hashes = [] presentation_hashes_fast = set() file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud' ) def network_job_factory( *args, **kwargs ): network_job = ClientNetworkingJobs.NetworkJob( *args, **kwargs ) network_job.OverrideBandwidth() return network_job def status_hook( text ): if len( text ) > 0: text = text.splitlines()[0] job_key.SetVariable( 'popup_text_2', text ) network_job_presentation_context_factory = GenerateMultiplePopupNetworkJobPresentationContextFactory( job_key ) for ( i, url ) in enumerate( urls ): ( i_paused, should_quit ) = job_key.WaitIfNeeded() if should_quit: break job_key.SetVariable( 'popup_text_1', HydrusData.ConvertValueRangeToPrettyString( i + 1, len( urls ) ) ) job_key.SetVariable( 'popup_gauge_1', ( i + 1, len( urls ) ) ) file_seed = ClientImportFileSeeds.FileSeed( ClientImportFileSeeds.FILE_SEED_TYPE_URL, url ) try: file_seed.DownloadAndImportRawFile( url, file_import_options, network_job_factory, network_job_presentation_context_factory, status_hook ) status = file_seed.status if status in CC.SUCCESSFUL_IMPORT_STATES: if status == CC.STATUS_SUCCESSFUL_AND_NEW: num_successful += 1 elif status == CC.STATUS_SUCCESSFUL_BUT_REDUNDANT: num_redundant += 1 if file_seed.HasHash(): hash = file_seed.GetHash() if hash not in presentation_hashes_fast: presentation_hashes.append( hash ) presentation_hashes_fast.add( hash ) if len( presentation_hashes ) > 0: job_key.SetVariable( 'popup_files', ( presentation_hashes, 'downloads' ) ) elif status == CC.STATUS_DELETED: num_deleted += 1 except Exception as e: num_failed += 1 HydrusData.Print( url + ' failed to import!' ) HydrusData.PrintException( e ) finally: job_key.DeleteVariable( 'popup_text_2' ) job_key.DeleteVariable( 'popup_network_job' ) text_components = [] if num_successful > 0: text_components.append( HydrusData.ToHumanInt( num_successful ) + ' successful' ) if num_redundant > 0: text_components.append( HydrusData.ToHumanInt( num_redundant ) + ' already in db' ) if num_deleted > 0: text_components.append( HydrusData.ToHumanInt( num_deleted ) + ' deleted' ) if num_failed > 0: text_components.append( HydrusData.ToHumanInt( num_failed ) + ' failed (errors written to log)' ) job_key.SetVariable( 'popup_text_1', ', '.join( text_components ) ) if len( presentation_hashes ) > 0: job_key.SetVariable( 'popup_files', ( presentation_hashes, 'downloads' ) ) job_key.DeleteVariable( 'popup_gauge_1' ) job_key.Finish()
def _ImportFiles(self, job_key): did_work = False time_to_save = HydrusData.GetNow() + 600 num_files_imported = 0 presentation_hashes = [] presentation_hashes_fast = set() i = 0 num_total = len(self._file_seed_cache) num_total_unknown = self._file_seed_cache.GetFileSeedCount( CC.STATUS_UNKNOWN) num_total_done = num_total - num_total_unknown while True: file_seed = self._file_seed_cache.GetNextFileSeed( CC.STATUS_UNKNOWN) p1 = HC.options['pause_import_folders_sync'] or self._paused p2 = HydrusThreading.IsThreadShuttingDown() p3 = job_key.IsCancelled() if file_seed is None or p1 or p2 or p3: break did_work = True if HydrusData.TimeHasPassed(time_to_save): HG.client_controller.WriteSynchronous('serialisable', self) time_to_save = HydrusData.GetNow() + 600 gauge_num_done = num_total_done + num_files_imported + 1 job_key.SetVariable( 'popup_text_1', 'importing file ' + HydrusData.ConvertValueRangeToPrettyString( gauge_num_done, num_total)) job_key.SetVariable('popup_gauge_1', (gauge_num_done, num_total)) path = file_seed.file_seed_data file_seed.ImportPath(self._file_seed_cache, self._file_import_options, limited_mimes=self._mimes) if file_seed.status in CC.SUCCESSFUL_IMPORT_STATES: if file_seed.HasHash(): hash = file_seed.GetHash() if self._tag_import_options.HasAdditionalTags(): media_result = HG.client_controller.Read( 'media_result', hash) downloaded_tags = [] service_keys_to_content_updates = self._tag_import_options.GetServiceKeysToContentUpdates( file_seed.status, media_result, downloaded_tags) # additional tags if len(service_keys_to_content_updates) > 0: HG.client_controller.WriteSynchronous( 'content_updates', service_keys_to_content_updates) service_keys_to_tags = ClientTags.ServiceKeysToTags() for (tag_service_key, filename_tagging_options) in list( self._tag_service_keys_to_filename_tagging_options. items()): if not HG.client_controller.services_manager.ServiceExists( tag_service_key): continue try: tags = filename_tagging_options.GetTags( tag_service_key, path) if len(tags) > 0: service_keys_to_tags[tag_service_key] = tags except Exception as e: HydrusData.ShowText( 'Trying to parse filename tags in the import folder "' + self._name + '" threw an error!') HydrusData.ShowException(e) if len(service_keys_to_tags) > 0: service_keys_to_content_updates = ClientData.ConvertServiceKeysToTagsToServiceKeysToContentUpdates( {hash}, service_keys_to_tags) HG.client_controller.WriteSynchronous( 'content_updates', service_keys_to_content_updates) num_files_imported += 1 if hash not in presentation_hashes_fast: if file_seed.ShouldPresent(self._file_import_options): presentation_hashes.append(hash) presentation_hashes_fast.add(hash) elif file_seed.status == CC.STATUS_ERROR: HydrusData.Print( 'A file failed to import from import folder ' + self._name + ':' + path) i += 1 if i % 10 == 0: self._ActionPaths() if num_files_imported > 0: HydrusData.Print('Import folder ' + self._name + ' imported ' + HydrusData.ToHumanInt(num_files_imported) + ' files.') if len(presentation_hashes) > 0: ClientImporting.PublishPresentationHashes( self._name, presentation_hashes, self._publish_files_to_popup_button, self._publish_files_to_page) self._ActionPaths() return did_work
def MainLoop(self): hashes_still_to_download_in_this_run = set() total_hashes_in_this_run = 0 total_successful_hashes_in_this_run = 0 while not (HydrusThreading.IsThreadShuttingDown() or self._shutting_down or HG.view_shutdown): with self._lock: if len(self._pending_hashes) > 0: if total_hashes_in_this_run == 0: job_key = ClientThreading.JobKey(cancellable=True) job_key.SetStatusTitle('downloading') job_key.SetVariable('popup_text_1', 'initialising downloader') job_key_pub_job = self._controller.CallLater( 2.0, self._controller.pub, 'message', job_key) num_before = len(hashes_still_to_download_in_this_run) hashes_still_to_download_in_this_run.update( self._pending_hashes) num_after = len(hashes_still_to_download_in_this_run) total_hashes_in_this_run += num_after - num_before self._pending_hashes = set() if len(hashes_still_to_download_in_this_run) == 0: total_hashes_in_this_run = 0 total_successful_hashes_in_this_run = 0 self._new_files_event.wait(5) self._new_files_event.clear() continue if job_key.IsCancelled(): hashes_still_to_download_in_this_run = set() continue hash = random.sample(hashes_still_to_download_in_this_run, 1)[0] hashes_still_to_download_in_this_run.discard(hash) total_done = total_hashes_in_this_run - len( hashes_still_to_download_in_this_run) job_key.SetVariable( 'popup_text_1', 'downloading files from remote services: {}'.format( HydrusData.ConvertValueRangeToPrettyString( total_done, total_hashes_in_this_run))) job_key.SetVariable('popup_gauge_1', (total_done, total_hashes_in_this_run)) try: errors_occured = [] file_successful = False media_result = self._controller.Read('media_result', hash) service_keys = list( media_result.GetLocationsManager().GetCurrent()) random.shuffle(service_keys) if CC.COMBINED_LOCAL_FILE_SERVICE_KEY in service_keys: total_successful_hashes_in_this_run += 1 continue for service_key in service_keys: try: service = self._controller.services_manager.GetService( service_key) except: continue try: if service.GetServiceType() == HC.FILE_REPOSITORY: file_repository = service if file_repository.IsFunctional(): (os_file_handle, temp_path) = HydrusTemp.GetTempPath() try: file_repository.Request( HC.GET, 'file', {'hash': hash}, temp_path=temp_path) exclude_deleted = False # this is the important part here do_not_check_known_urls_before_importing = False do_not_check_hashes_before_importing = False allow_decompression_bombs = True min_size = None max_size = None max_gif_size = None min_resolution = None max_resolution = None automatic_archive = False associate_primary_urls = True associate_source_urls = True file_import_options = FileImportOptions.FileImportOptions( ) file_import_options.SetPreImportOptions( exclude_deleted, do_not_check_known_urls_before_importing, do_not_check_hashes_before_importing, allow_decompression_bombs, min_size, max_size, max_gif_size, min_resolution, max_resolution) file_import_options.SetPostImportOptions( automatic_archive, associate_primary_urls, associate_source_urls) file_import_job = ClientImportFiles.FileImportJob( temp_path, file_import_options) file_import_job.DoWork() file_successful = True break finally: HydrusTemp.CleanUpTempPath( os_file_handle, temp_path) elif service.GetServiceType() == HC.IPFS: multihashes = HG.client_controller.Read( 'service_filenames', service_key, {hash}) if len(multihashes) > 0: multihash = multihashes[0] service.ImportFile(multihash, silent=True) file_successful = True break except Exception as e: errors_occured.append(e) if file_successful: total_successful_hashes_in_this_run += 1 if len(errors_occured) > 0: if not file_successful: raise errors_occured[0] except Exception as e: HydrusData.ShowException(e) hashes_still_to_download_in_this_run = 0 finally: if len(hashes_still_to_download_in_this_run) == 0: job_key.DeleteVariable('popup_text_1') job_key.DeleteVariable('popup_gauge_1') if total_successful_hashes_in_this_run > 0: job_key.SetVariable( 'popup_text_1', HydrusData.ToHumanInt( total_successful_hashes_in_this_run) + ' files downloaded') job_key_pub_job.Cancel() job_key.Finish() job_key.Delete(1)
def THREADSearchPotentials(self): try: search_distance = HG.client_controller.new_options.GetInteger( 'similar_files_duplicate_pairs_search_distance') with self._lock: if self._similar_files_maintenance_status is None: return searched_distances_to_count = self._similar_files_maintenance_status total_num_files = sum(searched_distances_to_count.values()) num_searched = sum( (count for (value, count) in searched_distances_to_count.items() if value is not None and value >= search_distance)) all_files_searched = num_searched >= total_num_files if all_files_searched: return # no work to do num_searched_estimate = num_searched HG.client_controller.pub('new_similar_files_maintenance_numbers') job_key = ClientThreading.JobKey(cancellable=True) job_key.SetStatusTitle('searching for potential duplicates') HG.client_controller.pub('message', job_key) still_work_to_do = True while still_work_to_do: search_distance = HG.client_controller.new_options.GetInteger( 'similar_files_duplicate_pairs_search_distance') start_time = HydrusData.GetNowPrecise() (still_work_to_do, num_done) = HG.client_controller.WriteSynchronous( 'maintain_similar_files_search_for_potential_duplicates', search_distance, maintenance_mode=HC.MAINTENANCE_FORCED, job_key=job_key, work_time_float=0.5) time_it_took = HydrusData.GetNowPrecise() - start_time num_searched_estimate += num_done if num_searched_estimate > total_num_files: similar_files_maintenance_status = HG.client_controller.Read( 'similar_files_maintenance_status') if similar_files_maintenance_status is None: break with self._lock: self._similar_files_maintenance_status = similar_files_maintenance_status searched_distances_to_count = self._similar_files_maintenance_status total_num_files = max( num_searched_estimate, sum(searched_distances_to_count.values())) text = 'searching: {}'.format( HydrusData.ConvertValueRangeToPrettyString( num_searched_estimate, total_num_files)) job_key.SetVariable('popup_text_1', text) job_key.SetVariable('popup_gauge_1', (num_searched_estimate, total_num_files)) if job_key.IsCancelled() or HG.model_shutdown: break time.sleep(min( 5, time_it_took)) # ideally 0.5s, but potentially longer job_key.Delete() finally: with self._lock: self._currently_doing_potentials_search = False self.RefreshMaintenanceNumbers() self.NotifyNewPotentialsSearchNumbers()
def RegeneratePending(self, file_service_id, tag_service_id, status_hook=None): (cache_current_mappings_table_name, cache_deleted_mappings_table_name, cache_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames( file_service_id, tag_service_id) ( cache_display_current_mappings_table_name, cache_display_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames( file_service_id, tag_service_id) if status_hook is not None: message = 'clearing old specific display data' status_hook(message) all_pending_storage_tag_ids = self._STS( self._Execute('SELECT DISTINCT tag_id FROM {};'.format( cache_pending_mappings_table_name))) storage_tag_ids_to_display_tag_ids = self.modules_tag_display.GetTagsToImplies( ClientTags.TAG_DISPLAY_ACTUAL, tag_service_id, all_pending_storage_tag_ids) all_pending_display_tag_ids = set( itertools.chain.from_iterable( storage_tag_ids_to_display_tag_ids.values())) del all_pending_storage_tag_ids del storage_tag_ids_to_display_tag_ids self.modules_mappings_counts.ClearCounts(ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id, keep_current=True) self._Execute('DELETE FROM {};'.format( cache_display_pending_mappings_table_name)) all_pending_display_tag_ids_to_implied_by_storage_tag_ids = self.modules_tag_display.GetTagsToImpliedBy( ClientTags.TAG_DISPLAY_ACTUAL, tag_service_id, all_pending_display_tag_ids, tags_are_ideal=True) counts_cache_changes = [] num_to_do = len( all_pending_display_tag_ids_to_implied_by_storage_tag_ids) for (i, (display_tag_id, storage_tag_ids)) in enumerate( all_pending_display_tag_ids_to_implied_by_storage_tag_ids. items()): if i % 100 == 0 and status_hook is not None: message = 'regenerating pending tags {}'.format( HydrusData.ConvertValueRangeToPrettyString( i + 1, num_to_do)) status_hook(message) if len(storage_tag_ids) == 1: (storage_tag_id, ) = storage_tag_ids self._Execute( 'INSERT OR IGNORE INTO {} ( tag_id, hash_id ) SELECT ?, hash_id FROM {} WHERE tag_id = ?;' .format(cache_display_pending_mappings_table_name, cache_pending_mappings_table_name), (display_tag_id, storage_tag_id)) pending_delta = self._GetRowCount() else: with self._MakeTemporaryIntegerTable( storage_tag_ids, 'tag_id') as temp_tag_ids_table_name: # temp tags to mappings merged self._Execute( 'INSERT OR IGNORE INTO {} ( tag_id, hash_id ) SELECT DISTINCT ?, hash_id FROM {} CROSS JOIN {} USING ( tag_id );' .format(cache_display_pending_mappings_table_name, temp_tag_ids_table_name, cache_pending_mappings_table_name), (display_tag_id, )) pending_delta = self._GetRowCount() counts_cache_changes.append((display_tag_id, 0, pending_delta)) self.modules_mappings_counts_update.AddCounts( ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id, counts_cache_changes)
def MaintainTree(self, maintenance_mode=HC.MAINTENANCE_FORCED, job_key=None, stop_time=None): time_started = HydrusData.GetNow() pub_job_key = False job_key_pubbed = False if job_key is None: job_key = ClientThreading.JobKey(cancellable=True) pub_job_key = True try: job_key.SetVariable('popup_title', 'similar files metadata maintenance') rebalance_phash_ids = self._STL( self._c.execute( 'SELECT phash_id FROM shape_maintenance_branch_regen;')) num_to_do = len(rebalance_phash_ids) while len(rebalance_phash_ids) > 0: if pub_job_key and not job_key_pubbed and HydrusData.TimeHasPassed( time_started + 5): HG.client_controller.pub('modal_message', job_key) job_key_pubbed = True (i_paused, should_quit) = job_key.WaitIfNeeded() should_stop = HG.client_controller.ShouldStopThisWork( maintenance_mode, stop_time=stop_time) if should_quit or should_stop: return num_done = num_to_do - len(rebalance_phash_ids) text = 'rebalancing similar file metadata - ' + HydrusData.ConvertValueRangeToPrettyString( num_done, num_to_do) HG.client_controller.frame_splash_status.SetSubtext(text) job_key.SetVariable('popup_text_1', text) job_key.SetVariable('popup_gauge_1', (num_done, num_to_do)) with HydrusDB.TemporaryIntegerTable( self._c, rebalance_phash_ids, 'phash_id') as temp_table_name: # temp phashes to tree (biggest_phash_id, ) = self._c.execute( 'SELECT phash_id FROM {} CROSS JOIN shape_vptree USING ( phash_id ) ORDER BY inner_population + outer_population DESC;' .format(temp_table_name)).fetchone() self._RegenerateBranch(job_key, biggest_phash_id) rebalance_phash_ids = self._STL( self._c.execute( 'SELECT phash_id FROM shape_maintenance_branch_regen;') ) finally: job_key.SetVariable('popup_text_1', 'done!') job_key.DeleteVariable('popup_gauge_1') job_key.DeleteVariable( 'popup_text_2') # used in the regenbranch call job_key.Finish() job_key.Delete(5)
def _GenerateBranch(self, job_key, parent_id, phash_id, phash, children): process_queue = collections.deque() process_queue.append((parent_id, phash_id, phash, children)) insert_rows = [] num_done = 0 num_to_do = len(children) + 1 while len(process_queue) > 0: job_key.SetVariable( 'popup_text_2', 'generating new branch -- ' + HydrusData.ConvertValueRangeToPrettyString( num_done, num_to_do)) (parent_id, phash_id, phash, children) = process_queue.popleft() if len(children) == 0: inner_id = None inner_population = 0 outer_id = None outer_population = 0 radius = None else: children = sorted( ((HydrusData.Get64BitHammingDistance(phash, child_phash), child_id, child_phash) for (child_id, child_phash) in children)) median_index = len(children) // 2 median_radius = children[median_index][0] inner_children = [(child_id, child_phash) for (distance, child_id, child_phash) in children if distance < median_radius] radius_children = [(child_id, child_phash) for (distance, child_id, child_phash) in children if distance == median_radius] outer_children = [(child_id, child_phash) for (distance, child_id, child_phash) in children if distance > median_radius] if len(inner_children) <= len(outer_children): radius = median_radius inner_children.extend(radius_children) else: radius = median_radius - 1 outer_children.extend(radius_children) inner_population = len(inner_children) outer_population = len(outer_children) (inner_id, inner_phash) = self._PopBestRootNode( inner_children) #HydrusData.MedianPop( inner_children ) if len(outer_children) == 0: outer_id = None else: (outer_id, outer_phash) = self._PopBestRootNode( outer_children ) #HydrusData.MedianPop( outer_children ) insert_rows.append((phash_id, parent_id, radius, inner_id, inner_population, outer_id, outer_population)) if inner_id is not None: process_queue.append( (phash_id, inner_id, inner_phash, inner_children)) if outer_id is not None: process_queue.append( (phash_id, outer_id, outer_phash, outer_children)) num_done += 1 job_key.SetVariable('popup_text_2', 'branch constructed, now committing') self._c.executemany( 'INSERT OR REPLACE INTO shape_vptree ( phash_id, parent_id, radius, inner_id, inner_population, outer_id, outer_population ) VALUES ( ?, ?, ?, ?, ?, ?, ? );', insert_rows)
def do_it(directory, neighbouring_txt_tag_service_keys, delete_afterwards, export_symlinks, quit_afterwards): pauser = HydrusData.BigJobPauser() for (index, (ordering_index, media)) in enumerate(to_do): try: QP.CallAfter( qt_update_label, HydrusData.ConvertValueRangeToPrettyString( index + 1, num_to_do)) hash = media.GetHash() mime = media.GetMime() path = self._GetPath(media) path = os.path.normpath(path) if not path.startswith(directory): raise Exception( 'It seems a destination path was above the main export directory! The file was "{}" and its destination path was "{}".' .format(hash.hex(), path)) path_dir = os.path.dirname(path) HydrusPaths.MakeSureDirectoryExists(path_dir) if export_tag_txts: tags_manager = media.GetTagsManager() tags = set() for service_key in neighbouring_txt_tag_service_keys: current_tags = tags_manager.GetCurrent( service_key, ClientTags.TAG_DISPLAY_SIBLINGS_AND_PARENTS) tags.update(current_tags) tags = sorted(tags) txt_path = path + '.txt' with open(txt_path, 'w', encoding='utf-8') as f: f.write(os.linesep.join(tags)) source_path = client_files_manager.GetFilePath( hash, mime, check_file_exists=False) if export_symlinks: os.symlink(source_path, path) else: HydrusPaths.MirrorFile(source_path, path) HydrusPaths.MakeFileWritable(path) except: QP.CallAfter( QW.QMessageBox.information, self, 'Information', 'Encountered a problem while attempting to export file with index ' + str(ordering_index + 1) + ':' + os.linesep * 2 + traceback.format_exc()) break pauser.Pause() if delete_afterwards: QP.CallAfter(qt_update_label, 'deleting') deletee_hashes = { media.GetHash() for (ordering_index, media) in to_do } chunks_of_hashes = HydrusData.SplitListIntoChunks( deletee_hashes, 64) reason = 'Deleted after manual export to "{}".'.format( directory) content_updates = [ HydrusData.ContentUpdate(HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_DELETE, chunk_of_hashes, reason=reason) for chunk_of_hashes in chunks_of_hashes ] for content_update in content_updates: HG.client_controller.WriteSynchronous( 'content_updates', {CC.LOCAL_FILE_SERVICE_KEY: [content_update]}) QP.CallAfter(qt_update_label, 'done!') time.sleep(1) QP.CallAfter(qt_update_label, 'export') QP.CallAfter(qt_done, quit_afterwards)
def work_callable(): job_key = ClientThreading.JobKey( cancellable = True ) title = 'moving files' if action == HC.CONTENT_UPDATE_MOVE else 'adding files' job_key.SetStatusTitle( title ) BLOCK_SIZE = 64 if len( applicable_media ) > BLOCK_SIZE: HG.client_controller.pub( 'message', job_key ) pauser = HydrusData.BigJobPauser() num_to_do = len( applicable_media ) now = HydrusData.GetNow() for ( i, block_of_media ) in enumerate( HydrusData.SplitListIntoChunks( applicable_media, BLOCK_SIZE ) ): if job_key.IsCancelled(): break job_key.SetVariable( 'popup_text_1', HydrusData.ConvertValueRangeToPrettyString( i * BLOCK_SIZE, num_to_do ) ) job_key.SetVariable( 'popup_gauge_1', ( i * BLOCK_SIZE, num_to_do ) ) content_updates = [] undelete_hashes = set() for m in block_of_media: if dest_service_key in m.GetLocationsManager().GetDeleted(): undelete_hashes.add( m.GetHash() ) else: content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_ADD, ( m.GetMediaResult().GetFileInfoManager(), now ) ) ) if len( undelete_hashes ) > 0: content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_UNDELETE, undelete_hashes ) ) HG.client_controller.WriteSynchronous( 'content_updates', { dest_service_key : content_updates } ) if action == HC.CONTENT_UPDATE_MOVE: block_of_hashes = [ m.GetHash() for m in block_of_media ] content_updates = [ HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_DELETE, block_of_hashes, reason = 'Moved to {}'.format( dest_service_name ) ) ] HG.client_controller.WriteSynchronous( 'content_updates', { source_service_key : content_updates } ) pauser.Pause() job_key.Delete()
def do_it(directory, neighbouring_txt_tag_service_keys, delete_afterwards, export_symlinks, quit_afterwards): job_key = ClientThreading.JobKey(cancellable=True) job_key.SetStatusTitle('file export') HG.client_controller.pub('message', job_key) pauser = HydrusData.BigJobPauser() for (index, (ordering_index, media, path)) in enumerate(to_do): if job_key.IsCancelled(): break try: x_of_y = HydrusData.ConvertValueRangeToPrettyString( index + 1, num_to_do) job_key.SetVariable('popup_text_1', 'Done {}'.format(x_of_y)) job_key.SetVariable('popup_gauge_1', (index + 1, num_to_do)) QP.CallAfter(qt_update_label, x_of_y) hash = media.GetHash() mime = media.GetMime() path = os.path.normpath(path) if not path.startswith(directory): raise Exception( 'It seems a destination path was above the main export directory! The file was "{}" and its destination path was "{}".' .format(hash.hex(), path)) path_dir = os.path.dirname(path) HydrusPaths.MakeSureDirectoryExists(path_dir) if export_tag_txts: tags_manager = media.GetTagsManager() tags = set() for service_key in neighbouring_txt_tag_service_keys: current_tags = tags_manager.GetCurrent( service_key, ClientTags.TAG_DISPLAY_ACTUAL) tags.update(current_tags) tags = sorted(tags) txt_path = path + '.txt' with open(txt_path, 'w', encoding='utf-8') as f: f.write(os.linesep.join(tags)) source_path = client_files_manager.GetFilePath( hash, mime, check_file_exists=False) if export_symlinks: os.symlink(source_path, path) else: HydrusPaths.MirrorFile(source_path, path) HydrusPaths.MakeFileWriteable(path) except: QP.CallAfter( QW.QMessageBox.information, self, 'Information', 'Encountered a problem while attempting to export file with index ' + str(ordering_index + 1) + ':' + os.linesep * 2 + traceback.format_exc()) break pauser.Pause() if not job_key.IsCancelled() and delete_afterwards: QP.CallAfter(qt_update_label, 'deleting') delete_lock_for_archived_files = HG.client_controller.new_options.GetBoolean( 'delete_lock_for_archived_files') if delete_lock_for_archived_files: deletee_hashes = { media.GetHash() for (ordering_index, media, path) in to_do if not media.HasArchive() } else: deletee_hashes = { media.GetHash() for (ordering_index, media, path) in to_do } chunks_of_hashes = HydrusData.SplitListIntoChunks( deletee_hashes, 64) reason = 'Deleted after manual export to "{}".'.format( directory) content_updates = [ HydrusData.ContentUpdate(HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_DELETE, chunk_of_hashes, reason=reason) for chunk_of_hashes in chunks_of_hashes ] for content_update in content_updates: HG.client_controller.WriteSynchronous( 'content_updates', {CC.LOCAL_FILE_SERVICE_KEY: [content_update]}) job_key.DeleteVariable('popup_gauge_1') job_key.SetVariable('popup_text_1', 'Done!') job_key.Finish() job_key.Delete(5) QP.CallAfter(qt_update_label, 'done!') time.sleep(1) QP.CallAfter(qt_update_label, 'export') QP.CallAfter(qt_done, quit_afterwards)