def __init__(self): HydrusSerialisable.SerialisableBase.__init__(self) file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud') self._pending_jobs = [] self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog() self._file_seed_cache = ClientImportFileSeeds.FileSeedCache() self._file_import_options = file_import_options self._formula_name = 'all files linked by images in page' self._queue_paused = False self._files_paused = False self._parser_status = '' self._current_action = '' self._lock = threading.Lock() self._files_network_job = None self._page_network_job = None self._files_repeating_job = None self._queue_repeating_job = None HG.client_controller.sub(self, 'NotifyFileSeedsUpdated', 'file_seed_cache_file_seeds_updated')
def _UpdateSerialisableInfo(self, version, old_serialisable_info): if version == 1: (serialisable_file_seed_cache, serialisable_file_import_options, paused) = old_serialisable_info gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog() serialisable_gallery_seed_log = gallery_seed_log.GetSerialisableTuple( ) new_serialisable_info = (serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_file_import_options, paused) return (2, new_serialisable_info) if version == 2: (serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_file_import_options, paused) = old_serialisable_info tag_import_options = ClientImportOptions.TagImportOptions( is_default=True) serialisable_tag_import_options = tag_import_options.GetSerialisableTuple( ) new_serialisable_info = (serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_file_import_options, serialisable_tag_import_options, paused) return (3, new_serialisable_info)
def __init__(self): HydrusSerialisable.SerialisableBase.__init__(self) self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog() self._file_seed_cache = ClientImportFileSeeds.FileSeedCache() self._file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud') self._tag_import_options = ClientImportOptions.TagImportOptions( is_default=True) self._paused = False self._downloader_key = HydrusData.GenerateKey() self._lock = threading.Lock() self._files_network_job = None self._gallery_network_job = None self._files_repeating_job = None self._gallery_repeating_job = None HG.client_controller.sub(self, 'NotifyFileSeedsUpdated', 'file_seed_cache_file_seeds_updated') HG.client_controller.sub(self, 'NotifyGallerySeedsUpdated', 'gallery_seed_log_gallery_seeds_updated')
def __init__(self): HydrusSerialisable.SerialisableBase.__init__(self) self._page_key = 'initialising page key' self._publish_to_page = False self._url = '' self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog() self._file_seed_cache = ClientImportFileSeeds.FileSeedCache() self._urls_to_filenames = {} self._urls_to_md5_base64 = {} self._checker_options = HG.client_controller.new_options.GetDefaultWatcherCheckerOptions( ) self._file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud') self._tag_import_options = ClientImportOptions.TagImportOptions( is_default=True) self._last_check_time = 0 self._checking_status = ClientImporting.CHECKER_STATUS_OK self._subject = 'unknown subject' self._next_check_time = None self._download_control_file_set = None self._download_control_file_clear = None self._download_control_checker_set = None self._download_control_checker_clear = None self._check_now = False self._files_paused = False self._checking_paused = False self._no_work_until = 0 self._no_work_until_reason = '' self._creation_time = HydrusData.GetNow() self._file_velocity_status = '' self._current_action = '' self._watcher_status = '' self._watcher_key = HydrusData.GenerateKey() self._lock = threading.Lock() self._last_pubbed_page_name = '' self._files_repeating_job = None self._checker_repeating_job = None HG.client_controller.sub(self, 'NotifyFileSeedsUpdated', 'file_seed_cache_file_seeds_updated')
def _CheckWatchableURL(self): def status_hook(text): with self._lock: self._watcher_status = text def title_hook(text): with self._lock: self._subject = text gallery_seed = ClientImportGallerySeeds.GallerySeed( self._url, can_generate_more_pages=False) self._gallery_seed_log.AddGallerySeeds((gallery_seed, )) try: (num_urls_added, added_all_possible_urls, result_404) = gallery_seed.WorkOnURL( self._gallery_seed_log, self._file_seed_cache, status_hook, title_hook, ClientImporting.GenerateWatcherNetworkJobFactory( self._watcher_key), self._CheckerNetworkJobPresentationContextFactory, self._file_import_options) if num_urls_added > 0: ClientImporting.WakeRepeatingJob(self._files_repeating_job) if result_404: with self._lock: self._checking_status = ClientImporting.CHECKER_STATUS_404 except HydrusExceptions.NetworkException as e: self._DelayWork(4 * 3600, HydrusData.ToUnicode(e)) HydrusData.PrintException(e) watcher_status = gallery_seed.note error_occurred = gallery_seed.status == CC.STATUS_ERROR watcher_status_should_stick = gallery_seed.status != CC.STATUS_SUCCESSFUL_AND_NEW self._FinishCheck(watcher_status, error_occurred, watcher_status_should_stick)
def PendURLs(self, urls): with self._lock: urls = filter(lambda u: len(u) > 1, urls) # > _1_ to take out the occasional whitespace file_seeds = [] gallery_seeds = [] for url in urls: url_match = HG.client_controller.network_engine.domain_manager.GetURLMatch( url) if url_match is None or url_match.GetURLType() in ( HC.URL_TYPE_FILE, HC.URL_TYPE_POST): file_seed = ClientImportFileSeeds.FileSeed( ClientImportFileSeeds.FILE_SEED_TYPE_URL, url) file_seeds.append(file_seed) else: can_generate_more_pages = False gallery_seed = ClientImportGallerySeeds.GallerySeed( url, can_generate_more_pages=can_generate_more_pages) gallery_seeds.append(gallery_seed) if len(gallery_seeds) > 0: self._gallery_seed_log.AddGallerySeeds(gallery_seeds) ClientImporting.WakeRepeatingJob(self._gallery_repeating_job) if len(file_seeds) > 0: self._file_seed_cache.AddFileSeeds(file_seeds) ClientImporting.WakeRepeatingJob(self._files_repeating_job)
def _ImportURLs(self, urls): gallery_seed_log = self._gallery_seed_log_get_callable() filtered_urls = [ url for url in urls if not gallery_seed_log.HasGalleryURL(url) ] urls_to_add = urls if len(filtered_urls) < urls: num_urls = len(urls) num_removed = num_urls - len(filtered_urls) message = 'Of the ' + HydrusData.ToHumanInt( num_urls ) + ' URLs you mean to add, ' + HydrusData.ToHumanInt( num_removed ) + ' are already in the gallery log. Would you like to only add new URLs or add everything (which will force a re-check of the duplicates)?' with ClientGUIDialogs.DialogYesNo( self, message, yes_label='only add new urls', no_label='add all urls, even duplicates') as dlg: result = dlg.ShowModal() if result == wx.ID_YES: urls_to_add = filtered_urls elif result == wx.ID_CANCEL: return can_generate_more_pages = False if self._can_generate_more_pages: message = 'Would you like these urls to only check for new files, or would you like them to also generate subsequent gallery pages, like a regular search would?' with ClientGUIDialogs.DialogYesNo( self, message, yes_label='just check what I am adding', no_label='start a potential new search for every url added' ) as dlg: result = dlg.ShowModal() if result == wx.ID_CANCEL: return can_generate_more_pages = result == wx.ID_NO gallery_seeds = [ ClientImportGallerySeeds.GallerySeed( url, can_generate_more_pages=can_generate_more_pages) for url in urls_to_add ] gallery_seed_log.AddGallerySeeds(gallery_seeds)
def _UpdateSerialisableInfo(self, version, old_serialisable_info): if version == 1: (url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_file_import_options, serialisable_tag_import_options, times_to_check, check_period, last_check_time, paused) = old_serialisable_info checker_options = ClientImportOptions.CheckerOptions( intended_files_per_check=8, never_faster_than=300, never_slower_than=86400, death_file_velocity=(1, 86400)) serialisable_checker_options = checker_options.GetSerialisableTuple( ) files_paused = paused checking_paused = paused new_serialisable_info = (url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused) return (2, new_serialisable_info) if version == 2: (url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused) = old_serialisable_info checking_status = ClientImporting.CHECKER_STATUS_OK subject = 'unknown subject' new_serialisable_info = (url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject) return (3, new_serialisable_info) if version == 3: (url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject) = old_serialisable_info no_work_until = 0 no_work_until_reason = '' new_serialisable_info = (url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason) return (4, new_serialisable_info) if version == 4: (url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason) = old_serialisable_info creation_time = HydrusData.GetNow() new_serialisable_info = (url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time) return (5, new_serialisable_info) if version == 5: (url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time) = old_serialisable_info gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog() serialisable_gallery_seed_log = gallery_seed_log.GetSerialisableTuple( ) new_serialisable_info = (url, serialisable_gallery_seed_log, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time) return (6, new_serialisable_info)
def _CheckWatchableURL(self): def file_seeds_callable(file_seeds): return ClientImporting.UpdateFileSeedCacheWithFileSeeds( self._file_seed_cache, file_seeds) def status_hook(text): with self._lock: self._watcher_status = text def title_hook(text): with self._lock: self._subject = text gallery_seed = ClientImportGallerySeeds.GallerySeed( self._url, can_generate_more_pages=False) self._gallery_seed_log.AddGallerySeeds((gallery_seed, )) with self._lock: self._watcher_status = 'checking' try: (num_urls_added, num_urls_already_in_file_seed_cache, num_urls_total, result_404, added_new_gallery_pages, stop_reason) = gallery_seed.WorkOnURL( 'watcher', self._gallery_seed_log, file_seeds_callable, status_hook, title_hook, self._NetworkJobFactory, self._CheckerNetworkJobPresentationContextFactory, self._file_import_options) if num_urls_added > 0: ClientImporting.WakeRepeatingJob(self._files_repeating_job) if result_404: with self._lock: self._checking_paused = True self._checking_status = ClientImporting.CHECKER_STATUS_404 if gallery_seed.status == CC.STATUS_ERROR: # the [DEAD] stuff can override watcher status, so let's give a brief time for this to display the error with self._lock: self._checking_paused = True self._watcher_status = gallery_seed.note time.sleep(5) except HydrusExceptions.NetworkException as e: delay = HG.client_controller.new_options.GetInteger( 'downloader_network_error_delay') self._DelayWork(delay, HydrusData.ToUnicode(e)) HydrusData.PrintException(e) watcher_status = gallery_seed.note watcher_status_should_stick = gallery_seed.status != CC.STATUS_SUCCESSFUL_AND_NEW with self._lock: if self._check_now: self._check_now = False self._watcher_status = watcher_status self._last_check_time = HydrusData.GetNow() self._UpdateFileVelocityStatus() self._UpdateNextCheckTime() self._Compact() if not watcher_status_should_stick: time.sleep(5) with self._lock: self._watcher_status = ''
def _WorkOnQueue(self, page_key): if len(self._pending_jobs) > 0: with self._lock: (url, simple_downloader_formula) = self._pending_jobs.pop(0) self._parser_status = 'checking ' + url error_occurred = False try: gallery_seed = ClientImportGallerySeeds.GallerySeed( url, can_generate_more_pages=False) self._gallery_seed_log.AddGallerySeeds((gallery_seed, )) network_job = ClientNetworkingJobs.NetworkJobDownloader( page_key, 'GET', url) network_job.OverrideBandwidth(30) HG.client_controller.network_engine.AddJob(network_job) with self._PageNetworkJobPresentationContextFactory( network_job): network_job.WaitUntilDone() data = network_job.GetContent() # parsing_context = {} parsing_context['url'] = url parsing_formula = simple_downloader_formula.GetFormula() file_seeds = [] for parsed_text in parsing_formula.Parse( parsing_context, data): try: file_url = urlparse.urljoin(url, parsed_text) file_seed = ClientImportFileSeeds.FileSeed( ClientImportFileSeeds.FILE_SEED_TYPE_URL, file_url) file_seed.SetReferralURL(url) file_seeds.append(file_seed) except: continue num_new = self._file_seed_cache.AddFileSeeds(file_seeds) if num_new > 0: ClientImporting.WakeRepeatingJob(self._files_repeating_job) parser_status = 'page checked OK with formula "' + simple_downloader_formula.GetName( ) + '" - ' + HydrusData.ToHumanInt(num_new) + ' new urls' num_already_in_file_seed_cache = len(file_seeds) - num_new if num_already_in_file_seed_cache > 0: parser_status += ' (' + HydrusData.ToHumanInt( num_already_in_file_seed_cache) + ' already in queue)' gallery_seed_status = CC.STATUS_SUCCESSFUL_AND_NEW except HydrusExceptions.ShutdownException: gallery_seed_status = CC.STATUS_VETOED parser_status = 'program is shutting down' return except HydrusExceptions.NotFoundException: gallery_seed_status = CC.STATUS_VETOED error_occurred = True parser_status = 'page 404' except Exception as e: gallery_seed_status = CC.STATUS_ERROR error_occurred = True parser_status = HydrusData.ToUnicode(e) finally: gallery_seed_note = parser_status gallery_seed.SetStatus(gallery_seed_status, note=gallery_seed_note) self._gallery_seed_log.NotifyGallerySeedsUpdated( (gallery_seed, )) with self._lock: self._parser_status = parser_status if error_occurred: time.sleep(5) return True else: with self._lock: self._parser_status = '' return False
def _UpdateSerialisableInfo(self, version, old_serialisable_info): if version == 1: (pending_page_urls, serialisable_file_seed_cache, serialisable_file_import_options, download_image_links, download_unlinked_images, paused) = old_serialisable_info queue_paused = paused files_paused = paused new_serialisable_info = (pending_page_urls, serialisable_file_seed_cache, serialisable_file_import_options, download_image_links, download_unlinked_images, queue_paused, files_paused) return (2, new_serialisable_info) if version == 2: (pending_page_urls, serialisable_file_seed_cache, serialisable_file_import_options, download_image_links, download_unlinked_images, queue_paused, files_paused) = old_serialisable_info pending_jobs = [] new_serialisable_info = (pending_jobs, serialisable_file_seed_cache, serialisable_file_import_options, queue_paused, files_paused) return (3, new_serialisable_info) if version == 3: (pending_jobs, serialisable_file_seed_cache, serialisable_file_import_options, queue_paused, files_paused) = old_serialisable_info pending_jobs = [] formula_name = 'all files linked by images in page' new_serialisable_info = (pending_jobs, serialisable_file_seed_cache, serialisable_file_import_options, formula_name, queue_paused, files_paused) return (4, new_serialisable_info) if version == 4: (pending_jobs, serialisable_file_seed_cache, serialisable_file_import_options, formula_name, queue_paused, files_paused) = old_serialisable_info gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog() serialisable_gallery_seed_log = gallery_seed_log.GetSerialisableTuple( ) new_serialisable_info = (pending_jobs, serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_file_import_options, formula_name, queue_paused, files_paused) return (5, new_serialisable_info)
def _UpdateSerialisableInfo(self, version, old_serialisable_info): if version == 1: (serialisable_gallery_identifier, serialisable_gallery_stream_identifiers, serialisable_current_query_stuff, pending_queries, get_tags_if_url_recognised_and_file_redundant, file_limit, gallery_paused, files_paused, serialisable_file_import_options, serialisable_tag_import_options, serialisable_file_seed_cache) = old_serialisable_info new_serialisable_info = (serialisable_gallery_identifier, serialisable_gallery_stream_identifiers, serialisable_current_query_stuff, pending_queries, file_limit, gallery_paused, files_paused, serialisable_file_import_options, serialisable_tag_import_options, serialisable_file_seed_cache) return (2, new_serialisable_info) if version == 2: (serialisable_gallery_identifier, serialisable_gallery_stream_identifiers, serialisable_current_query_stuff, pending_queries, file_limit, gallery_paused, files_paused, serialisable_file_import_options, serialisable_tag_import_options, serialisable_file_seed_cache) = old_serialisable_info gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog() serialisable_gallery_seed_log = gallery_seed_log.GetSerialisableTuple( ) new_serialisable_info = (serialisable_gallery_identifier, serialisable_gallery_stream_identifiers, serialisable_current_query_stuff, pending_queries, file_limit, gallery_paused, files_paused, serialisable_file_import_options, serialisable_tag_import_options, serialisable_gallery_seed_log, serialisable_file_seed_cache) return (3, new_serialisable_info) if version == 3: (serialisable_gallery_identifier, serialisable_gallery_stream_identifiers, serialisable_current_query_stuff, pending_queries, file_limit, gallery_paused, files_paused, serialisable_file_import_options, serialisable_tag_import_options, serialisable_gallery_seed_log, serialisable_file_seed_cache) = old_serialisable_info (current_query, current_query_num_new_urls, serialisable_current_gallery_stream_identifier, current_gallery_stream_identifier_page_index, serialisable_current_gallery_stream_identifier_found_urls, serialisable_pending_gallery_stream_identifiers ) = serialisable_current_query_stuff highlighted_gallery_import_key = None serialisable_highlighted_gallery_import_key = highlighted_gallery_import_key gallery_imports = HydrusSerialisable.SerialisableList() file_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_import_options) tag_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_tag_import_options) gallery_seed_log = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_gallery_seed_log) file_seed_cache = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_seed_cache) if len(file_seed_cache) > 0: current_query = 'queue brought from old page' gallery_import = GalleryImport( query=current_query, source_name='updated from old system', initial_search_urls=[]) gallery_import.PausePlayGallery() gallery_import.PausePlayFiles() gallery_import.SetFileLimit(file_limit) gallery_import.SetFileImportOptions(file_import_options) gallery_import.SetTagImportOptions(tag_import_options) gallery_import.SetFileSeedCache(file_seed_cache) gallery_import.SetGallerySeedLog(gallery_seed_log) gallery_imports.append(gallery_import) serialisable_gallery_imports = gallery_imports.GetSerialisableTuple( ) new_serialisable_info = ( serialisable_gallery_identifier, serialisable_highlighted_gallery_import_key, file_limit, serialisable_file_import_options, serialisable_tag_import_options, serialisable_gallery_imports) return (4, new_serialisable_info) if version == 4: (serialisable_gallery_identifier, serialisable_highlighted_gallery_import_key, file_limit, serialisable_file_import_options, serialisable_tag_import_options, serialisable_gallery_imports) = old_serialisable_info gallery_identifier = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_gallery_identifier) (gug_key, gug_name ) = ClientDownloading.ConvertGalleryIdentifierToGUGKeyAndName( gallery_identifier) serialisable_gug_key_and_name = ( HydrusData.GenerateKey().encode('hex'), gug_name) new_serialisable_info = ( serialisable_gug_key_and_name, serialisable_highlighted_gallery_import_key, file_limit, serialisable_file_import_options, serialisable_tag_import_options, serialisable_gallery_imports) return (5, new_serialisable_info) if version == 5: (serialisable_gug_key_and_name, serialisable_highlighted_gallery_import_key, file_limit, serialisable_file_import_options, serialisable_tag_import_options, serialisable_gallery_imports) = old_serialisable_info start_file_queues_paused = False start_gallery_queues_paused = False new_serialisable_info = ( serialisable_gug_key_and_name, serialisable_highlighted_gallery_import_key, file_limit, start_file_queues_paused, start_gallery_queues_paused, serialisable_file_import_options, serialisable_tag_import_options, serialisable_gallery_imports) return (6, new_serialisable_info)
def __init__(self, query=None, source_name=None, initial_search_urls=None, start_file_queue_paused=False, start_gallery_queue_paused=False): if query is None: query = 'samus_aran' if source_name is None: source_name = 'unknown' if initial_search_urls is None: initial_search_urls = [] HydrusSerialisable.SerialisableBase.__init__(self) self._creation_time = HydrusData.GetNow() self._gallery_import_key = HydrusData.GenerateKey() self._query = query self._source_name = source_name self._page_key = 'initialising page key' self._publish_to_page = False self._current_page_index = 0 self._num_new_urls_found = 0 self._num_urls_found = 0 self._file_limit = HC.options['gallery_file_limit'] self._files_paused = start_file_queue_paused self._gallery_paused = start_gallery_queue_paused self._file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud') self._tag_import_options = ClientImportOptions.TagImportOptions( is_default=True) self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog() gallery_seeds = [ ClientImportGallerySeeds.GallerySeed(url) for url in initial_search_urls ] self._gallery_seed_log.AddGallerySeeds(gallery_seeds) self._file_seed_cache = ClientImportFileSeeds.FileSeedCache() self._no_work_until = 0 self._no_work_until_reason = '' self._lock = threading.Lock() self._gallery_status = '' self._gallery_status_can_change_timestamp = 0 self._current_action = '' self._file_network_job = None self._gallery_network_job = None self._files_repeating_job = None self._gallery_repeating_job = None HG.client_controller.sub(self, 'NotifyFileSeedsUpdated', 'file_seed_cache_file_seeds_updated') HG.client_controller.sub(self, 'NotifyGallerySeedsUpdated', 'gallery_seed_log_gallery_seeds_updated')