def GetServiceKeysToContentUpdates( self, status: int, media_result: ClientMediaResult.MediaResult, filterable_tags: typing.Iterable[str], external_filterable_tags=None, external_additional_service_keys_to_tags=None): if external_filterable_tags is None: external_filterable_tags = set() if external_additional_service_keys_to_tags is None: external_additional_service_keys_to_tags = ClientTags.ServiceKeysToTags( ) filterable_tags = HydrusTags.CleanTags(filterable_tags) service_keys_to_tags = ClientTags.ServiceKeysToTags() for service_key in HG.client_controller.services_manager.GetServiceKeys( HC.REAL_TAG_SERVICES): service_additional_tags = set() if service_key in external_additional_service_keys_to_tags: service_additional_tags.update( external_additional_service_keys_to_tags[service_key]) if service_key in self._service_keys_to_service_tag_import_options: service_tag_import_options = self._service_keys_to_service_tag_import_options[ service_key] service_filterable_tags = set(filterable_tags) service_filterable_tags.update(external_filterable_tags) service_tags = service_tag_import_options.GetTags( service_key, status, media_result, service_filterable_tags, service_additional_tags) else: service_tags = service_additional_tags if len(service_tags) > 0: service_keys_to_tags[service_key] = service_tags hash = media_result.GetHash() service_keys_to_content_updates = ClientData.ConvertServiceKeysToTagsToServiceKeysToContentUpdates( {hash}, service_keys_to_tags) return service_keys_to_content_updates
def _UpdateSerialisableInfo( self, version, old_serialisable_info ): if version == 1: ( url, can_generate_more_pages, created, modified, status, note, referral_url ) = old_serialisable_info external_additional_service_keys_to_tags = ClientTags.ServiceKeysToTags() serialisable_external_additional_service_keys_to_tags = external_additional_service_keys_to_tags.GetSerialisableTuple() new_serialisable_info = ( url, can_generate_more_pages, serialisable_external_additional_service_keys_to_tags, created, modified, status, note, referral_url ) return ( 2, new_serialisable_info ) if version == 2: ( url, can_generate_more_pages, serialisable_external_additional_service_keys_to_tags, created, modified, status, note, referral_url ) = old_serialisable_info external_filterable_tags = set() serialisable_external_filterable_tags = list( external_filterable_tags ) new_serialisable_info = ( url, can_generate_more_pages, serialisable_external_filterable_tags, serialisable_external_additional_service_keys_to_tags, created, modified, status, note, referral_url ) return ( 3, new_serialisable_info )
def __init__(self, url=None, can_generate_more_pages=True): if url is None: url = 'https://nostrils-central.cx/index.php?post=s&tag=hyper_nostrils&page=3' else: try: url = HG.client_controller.network_engine.domain_manager.NormaliseURL( url) except HydrusExceptions.URLClassException: pass HydrusSerialisable.SerialisableBase.__init__(self) self.url = url self._can_generate_more_pages = can_generate_more_pages self._fixed_service_keys_to_tags = ClientTags.ServiceKeysToTags() self.created = HydrusData.GetNow() self.modified = self.created self.status = CC.STATUS_UNKNOWN self.note = '' self._referral_url = None self._force_next_page_url_generation = False self._run_token = HydrusData.GenerateKey()
def PendURLs(self, urls, service_keys_to_tags=None): if service_keys_to_tags is None: service_keys_to_tags = ClientTags.ServiceKeysToTags() with self._lock: urls = [u for u in urls if len(u) > 1 ] # > _1_ to take out the occasional whitespace file_seeds = [] gallery_seeds = [] for url in urls: try: url_class = HG.client_controller.network_engine.domain_manager.GetURLClass( url) except HydrusExceptions.URLClassException: continue if url_class is None or url_class.GetURLType() in ( HC.URL_TYPE_FILE, HC.URL_TYPE_POST): file_seed = ClientImportFileSeeds.FileSeed( ClientImportFileSeeds.FILE_SEED_TYPE_URL, url) file_seed.SetFixedServiceKeysToTags(service_keys_to_tags) file_seeds.append(file_seed) else: can_generate_more_pages = False gallery_seed = ClientImportGallerySeeds.GallerySeed( url, can_generate_more_pages=can_generate_more_pages) gallery_seed.SetFixedServiceKeysToTags( service_keys_to_tags) gallery_seeds.append(gallery_seed) if len(gallery_seeds) > 0: self._gallery_seed_log.AddGallerySeeds(gallery_seeds) ClientImporting.WakeRepeatingJob(self._gallery_repeating_job) if len(file_seeds) > 0: self._file_seed_cache.AddFileSeeds(file_seeds) ClientImporting.WakeRepeatingJob(self._files_repeating_job)
def __init__( self ): HydrusSerialisable.SerialisableBase.__init__( self ) self._page_key = 'initialising page key' self._publish_to_page = False self._url = '' self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog() self._file_seed_cache = ClientImportFileSeeds.FileSeedCache() self._fixed_service_keys_to_tags = ClientTags.ServiceKeysToTags() self._checker_options = HG.client_controller.new_options.GetDefaultWatcherCheckerOptions() self._file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud' ) self._tag_import_options = ClientImportOptions.TagImportOptions( is_default = True ) self._last_check_time = 0 self._checking_status = ClientImporting.CHECKER_STATUS_OK self._subject = 'unknown subject' self._next_check_time = None self._file_network_job = None self._checker_network_job = None self._check_now = False self._files_paused = False self._checking_paused = False self._no_work_until = 0 self._no_work_until_reason = '' self._creation_time = HydrusData.GetNow() self._file_velocity_status = '' self._file_status = '' self._watcher_status = '' self._watcher_key = HydrusData.GenerateKey() self._lock = threading.Lock() self._last_pubbed_page_name = '' self._files_repeating_job = None self._checker_repeating_job = None HG.client_controller.sub( self, 'NotifyFileSeedsUpdated', 'file_seed_cache_file_seeds_updated' )
def _UpdateSerialisableInfo(self, version, old_serialisable_info): if version == 1: (url, can_generate_more_pages, created, modified, status, note, referral_url) = old_serialisable_info fixed_service_keys_to_tags = ClientTags.ServiceKeysToTags() serialisable_fixed_service_keys_to_tags = fixed_service_keys_to_tags.GetSerialisableTuple( ) new_serialisable_info = (url, can_generate_more_pages, serialisable_fixed_service_keys_to_tags, created, modified, status, note, referral_url) return (2, new_serialisable_info)
def _ImportFiles(self, job_key): did_work = False time_to_save = HydrusData.GetNow() + 600 num_files_imported = 0 presentation_hashes = [] presentation_hashes_fast = set() i = 0 # don't want to start at 23/100 because of carrying over failed results or whatever # num_to_do is num currently unknown num_total = self._file_seed_cache.GetFileSeedCount(CC.STATUS_UNKNOWN) while True: file_seed = self._file_seed_cache.GetNextFileSeed( CC.STATUS_UNKNOWN) p1 = HG.client_controller.new_options.GetBoolean( 'pause_import_folders_sync') or self._paused p2 = HydrusThreading.IsThreadShuttingDown() p3 = job_key.IsCancelled() if file_seed is None or p1 or p2 or p3: break did_work = True if HydrusData.TimeHasPassed(time_to_save): HG.client_controller.WriteSynchronous('serialisable', self) time_to_save = HydrusData.GetNow() + 600 gauge_num_done = num_files_imported + 1 job_key.SetVariable( 'popup_text_1', 'importing file ' + HydrusData.ConvertValueRangeToPrettyString( gauge_num_done, num_total)) job_key.SetVariable('popup_gauge_1', (gauge_num_done, num_total)) path = file_seed.file_seed_data file_seed.ImportPath(self._file_seed_cache, self._file_import_options, limited_mimes=self._mimes) if file_seed.status in CC.SUCCESSFUL_IMPORT_STATES: hash = None if file_seed.HasHash(): hash = file_seed.GetHash() if self._tag_import_options.HasAdditionalTags(): media_result = HG.client_controller.Read( 'media_result', hash) downloaded_tags = [] service_keys_to_content_updates = self._tag_import_options.GetServiceKeysToContentUpdates( file_seed.status, media_result, downloaded_tags) # additional tags if len(service_keys_to_content_updates) > 0: HG.client_controller.WriteSynchronous( 'content_updates', service_keys_to_content_updates) service_keys_to_tags = ClientTags.ServiceKeysToTags() for (tag_service_key, filename_tagging_options) in list( self._tag_service_keys_to_filename_tagging_options. items()): if not HG.client_controller.services_manager.ServiceExists( tag_service_key): continue try: tags = filename_tagging_options.GetTags( tag_service_key, path) if len(tags) > 0: service_keys_to_tags[tag_service_key] = tags except Exception as e: HydrusData.ShowText( 'Trying to parse filename tags in the import folder "' + self._name + '" threw an error!') HydrusData.ShowException(e) if len(service_keys_to_tags) > 0: service_keys_to_content_updates = ClientData.ConvertServiceKeysToTagsToServiceKeysToContentUpdates( {hash}, service_keys_to_tags) HG.client_controller.WriteSynchronous( 'content_updates', service_keys_to_content_updates) num_files_imported += 1 if hash not in presentation_hashes_fast: if file_seed.ShouldPresent(self._file_import_options. GetPresentationImportOptions()): presentation_hashes.append(hash) presentation_hashes_fast.add(hash) elif file_seed.status == CC.STATUS_ERROR: HydrusData.Print( 'A file failed to import from import folder ' + self._name + ':' + path) i += 1 if i % 10 == 0: self._ActionPaths() if num_files_imported > 0: HydrusData.Print('Import folder ' + self._name + ' imported ' + HydrusData.ToHumanInt(num_files_imported) + ' files.') if len(presentation_hashes) > 0: ClientImporting.PublishPresentationHashes( self._name, presentation_hashes, self._publish_files_to_popup_button, self._publish_files_to_page) self._ActionPaths() return did_work
def SetExternalAdditionalServiceKeysToTags( self, service_keys_to_tags ): self._external_additional_service_keys_to_tags = ClientTags.ServiceKeysToTags( service_keys_to_tags )
def test_dict_to_content_updates(self): hash = HydrusData.GenerateKey() hashes = {hash} local_key = CC.DEFAULT_LOCAL_TAG_SERVICE_KEY remote_key = HG.test_controller.example_tag_repo_service_key service_keys_to_tags = ClientTags.ServiceKeysToTags({local_key: {'a'}}) content_updates = { local_key: [ HydrusData.ContentUpdate(HC.CONTENT_TYPE_MAPPINGS, HC.CONTENT_UPDATE_ADD, ('a', hashes)) ] } self.assertEqual( ClientData.ConvertServiceKeysToTagsToServiceKeysToContentUpdates( {hash}, service_keys_to_tags), content_updates) service_keys_to_tags = ClientTags.ServiceKeysToTags( {remote_key: {'c'}}) content_updates = { remote_key: [ HydrusData.ContentUpdate(HC.CONTENT_TYPE_MAPPINGS, HC.CONTENT_UPDATE_PEND, ('c', hashes)) ] } self.assertEqual( ClientData.ConvertServiceKeysToTagsToServiceKeysToContentUpdates( {hash}, service_keys_to_tags), content_updates) service_keys_to_tags = ClientTags.ServiceKeysToTags({ local_key: ['a', 'character:b'], remote_key: ['c', 'series:d'] }) content_updates = {} content_updates[local_key] = [ HydrusData.ContentUpdate(HC.CONTENT_TYPE_MAPPINGS, HC.CONTENT_UPDATE_ADD, ('a', hashes)), HydrusData.ContentUpdate(HC.CONTENT_TYPE_MAPPINGS, HC.CONTENT_UPDATE_ADD, ('character:b', hashes)) ] content_updates[remote_key] = [ HydrusData.ContentUpdate(HC.CONTENT_TYPE_MAPPINGS, HC.CONTENT_UPDATE_PEND, ('c', hashes)), HydrusData.ContentUpdate(HC.CONTENT_TYPE_MAPPINGS, HC.CONTENT_UPDATE_PEND, ('series:d', hashes)) ] self.assertEqual( HydrusData.ContentUpdate(HC.CONTENT_TYPE_MAPPINGS, HC.CONTENT_UPDATE_PEND, 'c'), HydrusData.ContentUpdate(HC.CONTENT_TYPE_MAPPINGS, HC.CONTENT_UPDATE_PEND, 'c')) self.assertEqual( ClientData.ConvertServiceKeysToTagsToServiceKeysToContentUpdates( {hash}, service_keys_to_tags), content_updates)
def SetFixedServiceKeysToTags(self, service_keys_to_tags): self._fixed_service_keys_to_tags = ClientTags.ServiceKeysToTags( service_keys_to_tags)
def _UpdateSerialisableInfo( self, version, old_serialisable_info ): if version == 1: ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_file_import_options, serialisable_tag_import_options, times_to_check, check_period, last_check_time, paused ) = old_serialisable_info checker_options = ClientImportOptions.CheckerOptions( intended_files_per_check = 8, never_faster_than = 300, never_slower_than = 86400, death_file_velocity = ( 1, 86400 ) ) serialisable_checker_options = checker_options.GetSerialisableTuple() files_paused = paused checking_paused = paused new_serialisable_info = ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused ) return ( 2, new_serialisable_info ) if version == 2: ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused ) = old_serialisable_info checking_status = ClientImporting.CHECKER_STATUS_OK subject = 'unknown subject' new_serialisable_info = ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject ) return ( 3, new_serialisable_info ) if version == 3: ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject ) = old_serialisable_info no_work_until = 0 no_work_until_reason = '' new_serialisable_info = ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason ) return ( 4, new_serialisable_info ) if version == 4: ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason ) = old_serialisable_info creation_time = HydrusData.GetNow() new_serialisable_info = ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time ) return ( 5, new_serialisable_info ) if version == 5: ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time ) = old_serialisable_info gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog() serialisable_gallery_seed_log = gallery_seed_log.GetSerialisableTuple() new_serialisable_info = ( url, serialisable_gallery_seed_log, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time ) return ( 6, new_serialisable_info ) if version == 6: ( url, serialisable_gallery_seed_log, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time ) = old_serialisable_info fixed_service_keys_to_tags = ClientTags.ServiceKeysToTags() serialisable_fixed_service_keys_to_tags = fixed_service_keys_to_tags.GetSerialisableTuple() new_serialisable_info = ( url, serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_fixed_service_keys_to_tags, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time ) return ( 7, new_serialisable_info )