def _UpdateSerialisableInfo( self, version, old_serialisable_info ): if version == 1: ( query, check_now, last_check_time, next_check_time, paused, status, serialisable_file_seed_cache ) = old_serialisable_info gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog() serialisable_gallery_seed_log = gallery_seed_log.GetSerialisableTuple() new_serialisable_info = ( query, check_now, last_check_time, next_check_time, paused, status, serialisable_gallery_seed_log, serialisable_file_seed_cache ) return ( 2, new_serialisable_info ) if version == 2: ( query, check_now, last_check_time, next_check_time, paused, status, serialisable_gallery_seed_log, serialisable_file_seed_cache ) = old_serialisable_info display_name = None tag_import_options = ClientImportOptions.TagImportOptions() serialisable_tag_import_options = tag_import_options.GetSerialisableTuple() new_serialisable_info = ( query, display_name, check_now, last_check_time, next_check_time, paused, status, serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_tag_import_options ) return ( 3, new_serialisable_info )
def _UpdateSerialisableInfo( self, version, old_serialisable_info ): if version == 1: ( serialisable_file_seed_cache, serialisable_file_import_options, paused ) = old_serialisable_info gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog() serialisable_gallery_seed_log = gallery_seed_log.GetSerialisableTuple() new_serialisable_info = ( serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_file_import_options, paused ) return ( 2, new_serialisable_info ) if version == 2: ( serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_file_import_options, paused ) = old_serialisable_info tag_import_options = TagImportOptions.TagImportOptions( is_default = True ) serialisable_tag_import_options = tag_import_options.GetSerialisableTuple() new_serialisable_info = ( serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_file_import_options, serialisable_tag_import_options, paused ) return ( 3, new_serialisable_info )
def __init__(self): HydrusSerialisable.SerialisableBase.__init__(self) file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud') self._pending_jobs = [] self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog() self._file_seed_cache = ClientImportFileSeeds.FileSeedCache() self._file_import_options = file_import_options self._formula_name = 'all files linked by images in page' self._queue_paused = False self._files_paused = False self._downloader_key = HydrusData.GenerateKey() self._parser_status = '' self._current_action = '' self._lock = threading.Lock() self._have_started = False self._files_network_job = None self._page_network_job = None self._files_repeating_job = None self._queue_repeating_job = None self._last_serialisable_change_timestamp = 0 HG.client_controller.sub(self, 'NotifyFileSeedsUpdated', 'file_seed_cache_file_seeds_updated')
def __init__( self ): HydrusSerialisable.SerialisableBase.__init__( self ) self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog() self._file_seed_cache = ClientImportFileSeeds.FileSeedCache() self._file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud' ) self._tag_import_options = TagImportOptions.TagImportOptions( is_default = True ) self._paused = False self._no_work_until = 0 self._no_work_until_reason = '' self._page_key = b'initialising page key' self._downloader_key = HydrusData.GenerateKey() self._lock = threading.Lock() self._have_started = False self._files_status = '' self._gallery_status = '' self._files_network_job = None self._gallery_network_job = None self._files_repeating_job = None self._gallery_repeating_job = None self._last_serialisable_change_timestamp = 0 HG.client_controller.sub( self, 'NotifyFileSeedsUpdated', 'file_seed_cache_file_seeds_updated' ) HG.client_controller.sub( self, 'NotifyGallerySeedsUpdated', 'gallery_seed_log_gallery_seeds_updated' )
def __init__(self): HydrusSerialisable.SerialisableBase.__init__(self) self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog() self._file_seed_cache = ClientImportFileSeeds.FileSeedCache() self._file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud') self._tag_import_options = ClientImportOptions.TagImportOptions( is_default=True) self._paused = False self._downloader_key = HydrusData.GenerateKey() self._lock = threading.Lock() self._files_network_job = None self._gallery_network_job = None self._files_repeating_job = None self._gallery_repeating_job = None HG.client_controller.sub(self, 'NotifyFileSeedsUpdated', 'file_seed_cache_file_seeds_updated') HG.client_controller.sub(self, 'NotifyGallerySeedsUpdated', 'gallery_seed_log_gallery_seeds_updated')
def PendURLs(self, urls, service_keys_to_tags=None): if service_keys_to_tags is None: service_keys_to_tags = ClientTags.ServiceKeysToTags() with self._lock: urls = [u for u in urls if len(u) > 1 ] # > _1_ to take out the occasional whitespace file_seeds = [] gallery_seeds = [] for url in urls: try: url_class = HG.client_controller.network_engine.domain_manager.GetURLClass( url) except HydrusExceptions.URLClassException: continue if url_class is None or url_class.GetURLType() in ( HC.URL_TYPE_FILE, HC.URL_TYPE_POST): file_seed = ClientImportFileSeeds.FileSeed( ClientImportFileSeeds.FILE_SEED_TYPE_URL, url) file_seed.SetFixedServiceKeysToTags(service_keys_to_tags) file_seeds.append(file_seed) else: can_generate_more_pages = False gallery_seed = ClientImportGallerySeeds.GallerySeed( url, can_generate_more_pages=can_generate_more_pages) gallery_seed.SetFixedServiceKeysToTags( service_keys_to_tags) gallery_seeds.append(gallery_seed) if len(gallery_seeds) > 0: self._gallery_seed_log.AddGallerySeeds(gallery_seeds) ClientImporting.WakeRepeatingJob(self._gallery_repeating_job) if len(file_seeds) > 0: self._file_seed_cache.AddFileSeeds(file_seeds) ClientImporting.WakeRepeatingJob(self._files_repeating_job)
def _ImportURLs( self, urls ): gallery_seed_log = self._gallery_seed_log_get_callable() urls = HydrusData.DedupeList( urls ) filtered_urls = [ url for url in urls if not gallery_seed_log.HasGalleryURL( url ) ] urls_to_add = urls if len( filtered_urls ) < len( urls ): num_urls = len( urls ) num_removed = num_urls - len( filtered_urls ) message = 'Of the ' + HydrusData.ToHumanInt( num_urls ) + ' URLs you mean to add, ' + HydrusData.ToHumanInt( num_removed ) + ' are already in the search log. Would you like to only add new URLs or add everything (which will force a re-check of the duplicates)?' ( result, was_cancelled ) = ClientGUIDialogsQuick.GetYesNo( self, message, yes_label = 'only add new urls', no_label = 'add all urls, even duplicates', check_for_cancelled = True ) if was_cancelled: return if result == QW.QDialog.Accepted: urls_to_add = filtered_urls elif result == QW.QDialog.Rejected: return can_generate_more_pages = False if self._can_generate_more_pages: message = 'Would you like these urls to only check for new files, or would you like them to also generate subsequent gallery pages, like a regular search would?' ( result, was_cancelled ) = ClientGUIDialogsQuick.GetYesNo( self, message, yes_label = 'just check what I am adding', no_label = 'start a potential new search for every url added', check_for_cancelled = True ) if was_cancelled: return can_generate_more_pages = result == QW.QDialog.Rejected gallery_seeds = [ ClientImportGallerySeeds.GallerySeed( url, can_generate_more_pages = can_generate_more_pages ) for url in urls_to_add ] gallery_seed_log.AddGallerySeeds( gallery_seeds )
def __init__(self): HydrusSerialisable.SerialisableBase.__init__(self) self._page_key = 'initialising page key' self._publish_to_page = False self._url = '' self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog() self._file_seed_cache = ClientImportFileSeeds.FileSeedCache() self._fixed_service_keys_to_tags = ClientTags.ServiceKeysToTags() self._checker_options = HG.client_controller.new_options.GetDefaultWatcherCheckerOptions( ) self._file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud') self._tag_import_options = ClientImportOptions.TagImportOptions( is_default=True) self._last_check_time = 0 self._checking_status = ClientImporting.CHECKER_STATUS_OK self._subject = 'unknown subject' self._next_check_time = None self._file_network_job = None self._checker_network_job = None self._check_now = False self._files_paused = False self._checking_paused = False self._no_work_until = 0 self._no_work_until_reason = '' self._creation_time = HydrusData.GetNow() self._file_velocity_status = '' self._file_status = '' self._watcher_status = '' self._watcher_key = HydrusData.GenerateKey() self._lock = threading.Lock() self._last_pubbed_page_name = '' self._files_repeating_job = None self._checker_repeating_job = None HG.client_controller.sub(self, 'NotifyFileSeedsUpdated', 'file_seed_cache_file_seeds_updated')
def __init__( self, query = 'query text' ): HydrusSerialisable.SerialisableBase.__init__( self ) self._query = query self._display_name = None self._check_now = False self._last_check_time = 0 self._next_check_time = 0 self._paused = False self._status = ClientImporting.CHECKER_STATUS_OK self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog() self._file_seed_cache = ClientImportFileSeeds.FileSeedCache() self._tag_import_options = ClientImportOptions.TagImportOptions()
def _UpdateSerialisableInfo( self, version, old_serialisable_info ): if version == 1: ( pending_page_urls, serialisable_file_seed_cache, serialisable_file_import_options, download_image_links, download_unlinked_images, paused ) = old_serialisable_info queue_paused = paused files_paused = paused new_serialisable_info = ( pending_page_urls, serialisable_file_seed_cache, serialisable_file_import_options, download_image_links, download_unlinked_images, queue_paused, files_paused ) return ( 2, new_serialisable_info ) if version == 2: ( pending_page_urls, serialisable_file_seed_cache, serialisable_file_import_options, download_image_links, download_unlinked_images, queue_paused, files_paused ) = old_serialisable_info pending_jobs = [] new_serialisable_info = ( pending_jobs, serialisable_file_seed_cache, serialisable_file_import_options, queue_paused, files_paused ) return ( 3, new_serialisable_info ) if version == 3: ( pending_jobs, serialisable_file_seed_cache, serialisable_file_import_options, queue_paused, files_paused ) = old_serialisable_info pending_jobs = [] formula_name = 'all files linked by images in page' new_serialisable_info = ( pending_jobs, serialisable_file_seed_cache, serialisable_file_import_options, formula_name, queue_paused, files_paused ) return ( 4, new_serialisable_info ) if version == 4: ( pending_jobs, serialisable_file_seed_cache, serialisable_file_import_options, formula_name, queue_paused, files_paused ) = old_serialisable_info gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog() serialisable_gallery_seed_log = gallery_seed_log.GetSerialisableTuple() new_serialisable_info = ( pending_jobs, serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_file_import_options, formula_name, queue_paused, files_paused ) return ( 5, new_serialisable_info )
def __init__(self, name): HydrusSerialisable.SerialisableBaseNamed.__init__(self, name) self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog() self._file_seed_cache = ClientImportFileSeeds.FileSeedCache()
def _WorkOnGallery( self ): if len( self._pending_jobs ) > 0: with self._lock: ( url, simple_downloader_formula ) = self._pending_jobs.pop( 0 ) self._gallery_status = 'checking ' + url error_occurred = False gallery_seed_status = CC.STATUS_ERROR parser_status = 'job not completed' gallery_seed = ClientImportGallerySeeds.GallerySeed( url, can_generate_more_pages = False ) try: self._gallery_seed_log.AddGallerySeeds( ( gallery_seed, ) ) network_job = self._NetworkJobFactory( 'GET', url ) network_job.OverrideBandwidth( 30 ) HG.client_controller.network_engine.AddJob( network_job ) with self._PageNetworkJobPresentationContextFactory( network_job ): network_job.WaitUntilDone() parsing_text = network_job.GetContentText() # parsing_context = {} parsing_context[ 'url' ] = url parsing_formula = simple_downloader_formula.GetFormula() file_seeds = [] for parsed_text in parsing_formula.Parse( parsing_context, parsing_text ): try: file_url = urllib.parse.urljoin( url, parsed_text ) file_seed = ClientImportFileSeeds.FileSeed( ClientImportFileSeeds.FILE_SEED_TYPE_URL, file_url ) file_seed.SetReferralURL( url ) file_seeds.append( file_seed ) except: continue num_new = self._file_seed_cache.AddFileSeeds( file_seeds ) if num_new > 0: ClientImporting.WakeRepeatingJob( self._files_repeating_job ) parser_status = 'page checked OK with formula "' + simple_downloader_formula.GetName() + '" - ' + HydrusData.ToHumanInt( num_new ) + ' new urls' num_already_in_file_seed_cache = len( file_seeds ) - num_new if num_already_in_file_seed_cache > 0: parser_status += ' (' + HydrusData.ToHumanInt( num_already_in_file_seed_cache ) + ' already in queue)' gallery_seed_status = CC.STATUS_SUCCESSFUL_AND_NEW except HydrusExceptions.ShutdownException: gallery_seed_status = CC.STATUS_VETOED parser_status = 'program is shutting down' return except HydrusExceptions.NotFoundException: gallery_seed_status = CC.STATUS_VETOED error_occurred = True parser_status = 'page 404' except HydrusExceptions.NetworkException as e: delay = HG.client_controller.new_options.GetInteger( 'downloader_network_error_delay' ) self._DelayWork( delay, str( e ) ) gallery_seed_status = CC.STATUS_ERROR error_occurred = True parser_status = str( e ) HydrusData.PrintException( e ) except Exception as e: gallery_seed_status = CC.STATUS_ERROR error_occurred = True parser_status = str( e ) finally: gallery_seed_note = parser_status gallery_seed.SetStatus( gallery_seed_status, note = gallery_seed_note ) self._gallery_seed_log.NotifyGallerySeedsUpdated( ( gallery_seed, ) ) with self._lock: self._gallery_status = ClientImportControl.NeatenStatusText( parser_status ) if error_occurred: time.sleep( 5 ) return True else: with self._lock: self._gallery_status = '' return False
def _UpdateSerialisableInfo(self, version, old_serialisable_info): if version == 1: (url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_file_import_options, serialisable_tag_import_options, times_to_check, check_period, last_check_time, paused) = old_serialisable_info checker_options = ClientImportOptions.CheckerOptions( intended_files_per_check=8, never_faster_than=300, never_slower_than=86400, death_file_velocity=(1, 86400)) serialisable_checker_options = checker_options.GetSerialisableTuple( ) files_paused = paused checking_paused = paused new_serialisable_info = (url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused) return (2, new_serialisable_info) if version == 2: (url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused) = old_serialisable_info checking_status = ClientImporting.CHECKER_STATUS_OK subject = 'unknown subject' new_serialisable_info = (url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject) return (3, new_serialisable_info) if version == 3: (url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject) = old_serialisable_info no_work_until = 0 no_work_until_reason = '' new_serialisable_info = (url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason) return (4, new_serialisable_info) if version == 4: (url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason) = old_serialisable_info creation_time = HydrusData.GetNow() new_serialisable_info = (url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time) return (5, new_serialisable_info) if version == 5: (url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time) = old_serialisable_info gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog() serialisable_gallery_seed_log = gallery_seed_log.GetSerialisableTuple( ) new_serialisable_info = (url, serialisable_gallery_seed_log, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time) return (6, new_serialisable_info) if version == 6: (url, serialisable_gallery_seed_log, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time) = old_serialisable_info fixed_service_keys_to_tags = ClientTags.ServiceKeysToTags() serialisable_fixed_service_keys_to_tags = fixed_service_keys_to_tags.GetSerialisableTuple( ) new_serialisable_info = (url, serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_fixed_service_keys_to_tags, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time) return (7, new_serialisable_info)
def _CheckWatchableURL(self): def file_seeds_callable(file_seeds): return ClientImporting.UpdateFileSeedCacheWithFileSeeds( self._file_seed_cache, file_seeds) def status_hook(text): with self._lock: if len(text) > 0: text = text.splitlines()[0] self._watcher_status = text def title_hook(text): with self._lock: if len(text) > 0: text = text.splitlines()[0] self._subject = text gallery_seed = ClientImportGallerySeeds.GallerySeed( self._url, can_generate_more_pages=False) gallery_seed.SetFixedServiceKeysToTags( self._fixed_service_keys_to_tags) self._gallery_seed_log.AddGallerySeeds((gallery_seed, )) with self._lock: self._watcher_status = 'checking' try: (num_urls_added, num_urls_already_in_file_seed_cache, num_urls_total, result_404, added_new_gallery_pages, stop_reason) = gallery_seed.WorkOnURL( 'watcher', self._gallery_seed_log, file_seeds_callable, status_hook, title_hook, self._NetworkJobFactory, self._CheckerNetworkJobPresentationContextFactory, self._file_import_options) if num_urls_added > 0: ClientImporting.WakeRepeatingJob(self._files_repeating_job) if result_404: with self._lock: self._checking_paused = True self._checking_status = ClientImporting.CHECKER_STATUS_404 if gallery_seed.status == CC.STATUS_ERROR: # the [DEAD] stuff can override watcher status, so let's give a brief time for this to display the error with self._lock: self._checking_paused = True self._watcher_status = gallery_seed.note time.sleep(5) except HydrusExceptions.NetworkException as e: delay = HG.client_controller.new_options.GetInteger( 'downloader_network_error_delay') self._DelayWork(delay, str(e)) HydrusData.PrintException(e) watcher_status = gallery_seed.note watcher_status_should_stick = gallery_seed.status != CC.STATUS_SUCCESSFUL_AND_NEW with self._lock: if self._check_now: self._check_now = False self._watcher_status = watcher_status self._last_check_time = HydrusData.GetNow() self._UpdateFileVelocityStatus() self._UpdateNextCheckTime() self._Compact() if not watcher_status_should_stick: time.sleep(5) with self._lock: self._watcher_status = ''