def PendURLs(self, urls, service_keys_to_tags=None): if service_keys_to_tags is None: service_keys_to_tags = ClientTags.ServiceKeysToTags() with self._lock: urls = [u for u in urls if len(u) > 1 ] # > _1_ to take out the occasional whitespace file_seeds = [] gallery_seeds = [] for url in urls: try: url_class = HG.client_controller.network_engine.domain_manager.GetURLClass( url) except HydrusExceptions.URLClassException: continue if url_class is None or url_class.GetURLType() in ( HC.URL_TYPE_FILE, HC.URL_TYPE_POST): file_seed = ClientImportFileSeeds.FileSeed( ClientImportFileSeeds.FILE_SEED_TYPE_URL, url) file_seed.SetFixedServiceKeysToTags(service_keys_to_tags) file_seeds.append(file_seed) else: can_generate_more_pages = False gallery_seed = ClientImportGallerySeeds.GallerySeed( url, can_generate_more_pages=can_generate_more_pages) gallery_seed.SetFixedServiceKeysToTags( service_keys_to_tags) gallery_seeds.append(gallery_seed) if len(gallery_seeds) > 0: self._gallery_seed_log.AddGallerySeeds(gallery_seeds) ClientImporting.WakeRepeatingJob(self._gallery_repeating_job) if len(file_seeds) > 0: self._file_seed_cache.AddFileSeeds(file_seeds) ClientImporting.WakeRepeatingJob(self._files_repeating_job)
def _ImportURLs( self, urls ): gallery_seed_log = self._gallery_seed_log_get_callable() urls = HydrusData.DedupeList( urls ) filtered_urls = [ url for url in urls if not gallery_seed_log.HasGalleryURL( url ) ] urls_to_add = urls if len( filtered_urls ) < len( urls ): num_urls = len( urls ) num_removed = num_urls - len( filtered_urls ) message = 'Of the ' + HydrusData.ToHumanInt( num_urls ) + ' URLs you mean to add, ' + HydrusData.ToHumanInt( num_removed ) + ' are already in the search log. Would you like to only add new URLs or add everything (which will force a re-check of the duplicates)?' ( result, was_cancelled ) = ClientGUIDialogsQuick.GetYesNo( self, message, yes_label = 'only add new urls', no_label = 'add all urls, even duplicates', check_for_cancelled = True ) if was_cancelled: return if result == QW.QDialog.Accepted: urls_to_add = filtered_urls elif result == QW.QDialog.Rejected: return can_generate_more_pages = False if self._can_generate_more_pages: message = 'Would you like these urls to only check for new files, or would you like them to also generate subsequent gallery pages, like a regular search would?' ( result, was_cancelled ) = ClientGUIDialogsQuick.GetYesNo( self, message, yes_label = 'just check what I am adding', no_label = 'start a potential new search for every url added', check_for_cancelled = True ) if was_cancelled: return can_generate_more_pages = result == QW.QDialog.Rejected gallery_seeds = [ ClientImportGallerySeeds.GallerySeed( url, can_generate_more_pages = can_generate_more_pages ) for url in urls_to_add ] gallery_seed_log.AddGallerySeeds( gallery_seeds )
def _WorkOnGallery( self ): if len( self._pending_jobs ) > 0: with self._lock: ( url, simple_downloader_formula ) = self._pending_jobs.pop( 0 ) self._gallery_status = 'checking ' + url error_occurred = False gallery_seed_status = CC.STATUS_ERROR parser_status = 'job not completed' gallery_seed = ClientImportGallerySeeds.GallerySeed( url, can_generate_more_pages = False ) try: self._gallery_seed_log.AddGallerySeeds( ( gallery_seed, ) ) network_job = self._NetworkJobFactory( 'GET', url ) network_job.OverrideBandwidth( 30 ) HG.client_controller.network_engine.AddJob( network_job ) with self._PageNetworkJobPresentationContextFactory( network_job ): network_job.WaitUntilDone() parsing_text = network_job.GetContentText() # parsing_context = {} parsing_context[ 'url' ] = url parsing_formula = simple_downloader_formula.GetFormula() file_seeds = [] for parsed_text in parsing_formula.Parse( parsing_context, parsing_text ): try: file_url = urllib.parse.urljoin( url, parsed_text ) file_seed = ClientImportFileSeeds.FileSeed( ClientImportFileSeeds.FILE_SEED_TYPE_URL, file_url ) file_seed.SetReferralURL( url ) file_seeds.append( file_seed ) except: continue num_new = self._file_seed_cache.AddFileSeeds( file_seeds ) if num_new > 0: ClientImporting.WakeRepeatingJob( self._files_repeating_job ) parser_status = 'page checked OK with formula "' + simple_downloader_formula.GetName() + '" - ' + HydrusData.ToHumanInt( num_new ) + ' new urls' num_already_in_file_seed_cache = len( file_seeds ) - num_new if num_already_in_file_seed_cache > 0: parser_status += ' (' + HydrusData.ToHumanInt( num_already_in_file_seed_cache ) + ' already in queue)' gallery_seed_status = CC.STATUS_SUCCESSFUL_AND_NEW except HydrusExceptions.ShutdownException: gallery_seed_status = CC.STATUS_VETOED parser_status = 'program is shutting down' return except HydrusExceptions.NotFoundException: gallery_seed_status = CC.STATUS_VETOED error_occurred = True parser_status = 'page 404' except HydrusExceptions.NetworkException as e: delay = HG.client_controller.new_options.GetInteger( 'downloader_network_error_delay' ) self._DelayWork( delay, str( e ) ) gallery_seed_status = CC.STATUS_ERROR error_occurred = True parser_status = str( e ) HydrusData.PrintException( e ) except Exception as e: gallery_seed_status = CC.STATUS_ERROR error_occurred = True parser_status = str( e ) finally: gallery_seed_note = parser_status gallery_seed.SetStatus( gallery_seed_status, note = gallery_seed_note ) self._gallery_seed_log.NotifyGallerySeedsUpdated( ( gallery_seed, ) ) with self._lock: self._gallery_status = ClientImportControl.NeatenStatusText( parser_status ) if error_occurred: time.sleep( 5 ) return True else: with self._lock: self._gallery_status = '' return False
def _CheckWatchableURL(self): def file_seeds_callable(file_seeds): return ClientImporting.UpdateFileSeedCacheWithFileSeeds( self._file_seed_cache, file_seeds) def status_hook(text): with self._lock: if len(text) > 0: text = text.splitlines()[0] self._watcher_status = text def title_hook(text): with self._lock: if len(text) > 0: text = text.splitlines()[0] self._subject = text gallery_seed = ClientImportGallerySeeds.GallerySeed( self._url, can_generate_more_pages=False) gallery_seed.SetFixedServiceKeysToTags( self._fixed_service_keys_to_tags) self._gallery_seed_log.AddGallerySeeds((gallery_seed, )) with self._lock: self._watcher_status = 'checking' try: (num_urls_added, num_urls_already_in_file_seed_cache, num_urls_total, result_404, added_new_gallery_pages, stop_reason) = gallery_seed.WorkOnURL( 'watcher', self._gallery_seed_log, file_seeds_callable, status_hook, title_hook, self._NetworkJobFactory, self._CheckerNetworkJobPresentationContextFactory, self._file_import_options) if num_urls_added > 0: ClientImporting.WakeRepeatingJob(self._files_repeating_job) if result_404: with self._lock: self._checking_paused = True self._checking_status = ClientImporting.CHECKER_STATUS_404 if gallery_seed.status == CC.STATUS_ERROR: # the [DEAD] stuff can override watcher status, so let's give a brief time for this to display the error with self._lock: self._checking_paused = True self._watcher_status = gallery_seed.note time.sleep(5) except HydrusExceptions.NetworkException as e: delay = HG.client_controller.new_options.GetInteger( 'downloader_network_error_delay') self._DelayWork(delay, str(e)) HydrusData.PrintException(e) watcher_status = gallery_seed.note watcher_status_should_stick = gallery_seed.status != CC.STATUS_SUCCESSFUL_AND_NEW with self._lock: if self._check_now: self._check_now = False self._watcher_status = watcher_status self._last_check_time = HydrusData.GetNow() self._UpdateFileVelocityStatus() self._UpdateNextCheckTime() self._Compact() if not watcher_status_should_stick: time.sleep(5) with self._lock: self._watcher_status = ''