def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
     
     if version == 1:
         
         ( query, check_now, last_check_time, next_check_time, paused, status, serialisable_file_seed_cache ) = old_serialisable_info
         
         gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog()
         
         serialisable_gallery_seed_log = gallery_seed_log.GetSerialisableTuple()
         
         new_serialisable_info = ( query, check_now, last_check_time, next_check_time, paused, status, serialisable_gallery_seed_log, serialisable_file_seed_cache )
         
         return ( 2, new_serialisable_info )
         
     
     if version == 2:
         
         ( query, check_now, last_check_time, next_check_time, paused, status, serialisable_gallery_seed_log, serialisable_file_seed_cache ) = old_serialisable_info
         
         display_name = None
         tag_import_options = ClientImportOptions.TagImportOptions()
         
         serialisable_tag_import_options = tag_import_options.GetSerialisableTuple()
         
         new_serialisable_info = ( query, display_name, check_now, last_check_time, next_check_time, paused, status, serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_tag_import_options )
         
         return ( 3, new_serialisable_info )
Example #2
0
 def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
     
     if version == 1:
         
         ( serialisable_file_seed_cache, serialisable_file_import_options, paused ) = old_serialisable_info
         
         gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog()
         
         serialisable_gallery_seed_log = gallery_seed_log.GetSerialisableTuple()
         
         new_serialisable_info = ( serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_file_import_options, paused )
         
         return ( 2, new_serialisable_info )
         
     
     if version == 2:
         
         ( serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_file_import_options, paused ) = old_serialisable_info
         
         tag_import_options = TagImportOptions.TagImportOptions( is_default = True )
         
         serialisable_tag_import_options = tag_import_options.GetSerialisableTuple()
         
         new_serialisable_info = ( serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_file_import_options, serialisable_tag_import_options, paused )
         
         return ( 3, new_serialisable_info )
Example #3
0
    def __init__(self):

        HydrusSerialisable.SerialisableBase.__init__(self)

        file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions(
            'loud')

        self._pending_jobs = []
        self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog()
        self._file_seed_cache = ClientImportFileSeeds.FileSeedCache()
        self._file_import_options = file_import_options
        self._formula_name = 'all files linked by images in page'
        self._queue_paused = False
        self._files_paused = False

        self._downloader_key = HydrusData.GenerateKey()

        self._parser_status = ''
        self._current_action = ''

        self._lock = threading.Lock()

        self._have_started = False

        self._files_network_job = None
        self._page_network_job = None

        self._files_repeating_job = None
        self._queue_repeating_job = None

        self._last_serialisable_change_timestamp = 0

        HG.client_controller.sub(self, 'NotifyFileSeedsUpdated',
                                 'file_seed_cache_file_seeds_updated')
Example #4
0
 def __init__( self ):
     
     HydrusSerialisable.SerialisableBase.__init__( self )
     
     self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog()
     self._file_seed_cache = ClientImportFileSeeds.FileSeedCache()
     self._file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud' )
     self._tag_import_options = TagImportOptions.TagImportOptions( is_default = True )
     self._paused = False
     
     self._no_work_until = 0
     self._no_work_until_reason = ''
     
     self._page_key = b'initialising page key'
     self._downloader_key = HydrusData.GenerateKey()
     
     self._lock = threading.Lock()
     
     self._have_started = False
     
     self._files_status = ''
     self._gallery_status = ''
     
     self._files_network_job = None
     self._gallery_network_job = None
     
     self._files_repeating_job = None
     self._gallery_repeating_job = None
     
     self._last_serialisable_change_timestamp = 0
     
     HG.client_controller.sub( self, 'NotifyFileSeedsUpdated', 'file_seed_cache_file_seeds_updated' )
     HG.client_controller.sub( self, 'NotifyGallerySeedsUpdated', 'gallery_seed_log_gallery_seeds_updated' )
Example #5
0
    def __init__(self):

        HydrusSerialisable.SerialisableBase.__init__(self)

        self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog()
        self._file_seed_cache = ClientImportFileSeeds.FileSeedCache()
        self._file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions(
            'loud')
        self._tag_import_options = ClientImportOptions.TagImportOptions(
            is_default=True)
        self._paused = False

        self._downloader_key = HydrusData.GenerateKey()

        self._lock = threading.Lock()

        self._files_network_job = None
        self._gallery_network_job = None

        self._files_repeating_job = None
        self._gallery_repeating_job = None

        HG.client_controller.sub(self, 'NotifyFileSeedsUpdated',
                                 'file_seed_cache_file_seeds_updated')
        HG.client_controller.sub(self, 'NotifyGallerySeedsUpdated',
                                 'gallery_seed_log_gallery_seeds_updated')
Example #6
0
    def PendURLs(self, urls, service_keys_to_tags=None):

        if service_keys_to_tags is None:

            service_keys_to_tags = ClientTags.ServiceKeysToTags()

        with self._lock:

            urls = [u for u in urls if len(u) > 1
                    ]  # > _1_ to take out the occasional whitespace

            file_seeds = []

            gallery_seeds = []

            for url in urls:

                try:

                    url_class = HG.client_controller.network_engine.domain_manager.GetURLClass(
                        url)

                except HydrusExceptions.URLClassException:

                    continue

                if url_class is None or url_class.GetURLType() in (
                        HC.URL_TYPE_FILE, HC.URL_TYPE_POST):

                    file_seed = ClientImportFileSeeds.FileSeed(
                        ClientImportFileSeeds.FILE_SEED_TYPE_URL, url)

                    file_seed.SetFixedServiceKeysToTags(service_keys_to_tags)

                    file_seeds.append(file_seed)

                else:

                    can_generate_more_pages = False

                    gallery_seed = ClientImportGallerySeeds.GallerySeed(
                        url, can_generate_more_pages=can_generate_more_pages)

                    gallery_seed.SetFixedServiceKeysToTags(
                        service_keys_to_tags)

                    gallery_seeds.append(gallery_seed)

            if len(gallery_seeds) > 0:

                self._gallery_seed_log.AddGallerySeeds(gallery_seeds)

                ClientImporting.WakeRepeatingJob(self._gallery_repeating_job)

            if len(file_seeds) > 0:

                self._file_seed_cache.AddFileSeeds(file_seeds)

                ClientImporting.WakeRepeatingJob(self._files_repeating_job)
Example #7
0
 def _ImportURLs( self, urls ):
     
     gallery_seed_log = self._gallery_seed_log_get_callable()
     
     urls = HydrusData.DedupeList( urls )
     
     filtered_urls = [ url for url in urls if not gallery_seed_log.HasGalleryURL( url ) ]
     
     urls_to_add = urls
     
     if len( filtered_urls ) < len( urls ):
         
         num_urls = len( urls )
         num_removed = num_urls - len( filtered_urls )
         
         message = 'Of the ' + HydrusData.ToHumanInt( num_urls ) + ' URLs you mean to add, ' + HydrusData.ToHumanInt( num_removed ) + ' are already in the search log. Would you like to only add new URLs or add everything (which will force a re-check of the duplicates)?'
         
         ( result, was_cancelled ) = ClientGUIDialogsQuick.GetYesNo( self, message, yes_label = 'only add new urls', no_label = 'add all urls, even duplicates', check_for_cancelled = True )
         
         if was_cancelled:
             
             return
             
         
         if result == QW.QDialog.Accepted:
             
             urls_to_add = filtered_urls
             
         elif result == QW.QDialog.Rejected:
             
             return
             
         
     
     can_generate_more_pages = False
     
     if self._can_generate_more_pages:
         
         message = 'Would you like these urls to only check for new files, or would you like them to also generate subsequent gallery pages, like a regular search would?'
         
         ( result, was_cancelled ) = ClientGUIDialogsQuick.GetYesNo( self, message, yes_label = 'just check what I am adding', no_label = 'start a potential new search for every url added', check_for_cancelled = True )
         
         if was_cancelled:
             
             return
             
         
         can_generate_more_pages = result == QW.QDialog.Rejected
         
     
     gallery_seeds = [ ClientImportGallerySeeds.GallerySeed( url, can_generate_more_pages = can_generate_more_pages ) for url in urls_to_add ]
     
     gallery_seed_log.AddGallerySeeds( gallery_seeds )
Example #8
0
    def __init__(self):

        HydrusSerialisable.SerialisableBase.__init__(self)

        self._page_key = 'initialising page key'
        self._publish_to_page = False

        self._url = ''

        self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog()
        self._file_seed_cache = ClientImportFileSeeds.FileSeedCache()

        self._fixed_service_keys_to_tags = ClientTags.ServiceKeysToTags()

        self._checker_options = HG.client_controller.new_options.GetDefaultWatcherCheckerOptions(
        )
        self._file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions(
            'loud')
        self._tag_import_options = ClientImportOptions.TagImportOptions(
            is_default=True)
        self._last_check_time = 0
        self._checking_status = ClientImporting.CHECKER_STATUS_OK
        self._subject = 'unknown subject'

        self._next_check_time = None

        self._file_network_job = None
        self._checker_network_job = None

        self._check_now = False
        self._files_paused = False
        self._checking_paused = False

        self._no_work_until = 0
        self._no_work_until_reason = ''

        self._creation_time = HydrusData.GetNow()

        self._file_velocity_status = ''
        self._file_status = ''
        self._watcher_status = ''

        self._watcher_key = HydrusData.GenerateKey()

        self._lock = threading.Lock()

        self._last_pubbed_page_name = ''

        self._files_repeating_job = None
        self._checker_repeating_job = None

        HG.client_controller.sub(self, 'NotifyFileSeedsUpdated',
                                 'file_seed_cache_file_seeds_updated')
 def __init__( self, query = 'query text' ):
     
     HydrusSerialisable.SerialisableBase.__init__( self )
     
     self._query = query
     self._display_name = None
     self._check_now = False
     self._last_check_time = 0
     self._next_check_time = 0
     self._paused = False
     self._status = ClientImporting.CHECKER_STATUS_OK
     self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog()
     self._file_seed_cache = ClientImportFileSeeds.FileSeedCache()
     self._tag_import_options = ClientImportOptions.TagImportOptions()
Example #10
0
 def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
     
     if version == 1:
         
         ( pending_page_urls, serialisable_file_seed_cache, serialisable_file_import_options, download_image_links, download_unlinked_images, paused ) = old_serialisable_info
         
         queue_paused = paused
         files_paused = paused
         
         new_serialisable_info = ( pending_page_urls, serialisable_file_seed_cache, serialisable_file_import_options, download_image_links, download_unlinked_images, queue_paused, files_paused )
         
         return ( 2, new_serialisable_info )
         
     
     if version == 2:
         
         ( pending_page_urls, serialisable_file_seed_cache, serialisable_file_import_options, download_image_links, download_unlinked_images, queue_paused, files_paused ) = old_serialisable_info
         
         pending_jobs = []
         
         new_serialisable_info = ( pending_jobs, serialisable_file_seed_cache, serialisable_file_import_options, queue_paused, files_paused )
         
         return ( 3, new_serialisable_info )
         
     
     if version == 3:
         
         ( pending_jobs, serialisable_file_seed_cache, serialisable_file_import_options, queue_paused, files_paused ) = old_serialisable_info
         
         pending_jobs = []
         
         formula_name = 'all files linked by images in page'
         
         new_serialisable_info = ( pending_jobs, serialisable_file_seed_cache, serialisable_file_import_options, formula_name, queue_paused, files_paused )
         
         return ( 4, new_serialisable_info )
         
     
     if version == 4:
         
         ( pending_jobs, serialisable_file_seed_cache, serialisable_file_import_options, formula_name, queue_paused, files_paused ) = old_serialisable_info
         
         gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog()
         
         serialisable_gallery_seed_log = gallery_seed_log.GetSerialisableTuple()
         
         new_serialisable_info = ( pending_jobs, serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_file_import_options, formula_name, queue_paused, files_paused )
         
         return ( 5, new_serialisable_info )
    def __init__(self, name):

        HydrusSerialisable.SerialisableBaseNamed.__init__(self, name)

        self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog()
        self._file_seed_cache = ClientImportFileSeeds.FileSeedCache()
Example #12
0
 def _WorkOnGallery( self ):
     
     if len( self._pending_jobs ) > 0:
         
         with self._lock:
             
             ( url, simple_downloader_formula ) = self._pending_jobs.pop( 0 )
             
             self._gallery_status = 'checking ' + url
             
         
         error_occurred = False
         
         gallery_seed_status = CC.STATUS_ERROR
         parser_status = 'job not completed'
         
         gallery_seed = ClientImportGallerySeeds.GallerySeed( url, can_generate_more_pages = False )
         
         try:
             
             self._gallery_seed_log.AddGallerySeeds( ( gallery_seed, ) )
             
             network_job = self._NetworkJobFactory( 'GET', url )
             
             network_job.OverrideBandwidth( 30 )
             
             HG.client_controller.network_engine.AddJob( network_job )
             
             with self._PageNetworkJobPresentationContextFactory( network_job ):
                 
                 network_job.WaitUntilDone()
                 
             
             parsing_text = network_job.GetContentText()
             
             #
             
             parsing_context = {}
             
             parsing_context[ 'url' ] = url
             
             parsing_formula = simple_downloader_formula.GetFormula()
             
             file_seeds = []
             
             for parsed_text in parsing_formula.Parse( parsing_context, parsing_text ):
                 
                 try:
                     
                     file_url = urllib.parse.urljoin( url, parsed_text )
                     
                     file_seed = ClientImportFileSeeds.FileSeed( ClientImportFileSeeds.FILE_SEED_TYPE_URL, file_url )
                     
                     file_seed.SetReferralURL( url )
                     
                     file_seeds.append( file_seed )
                     
                 except:
                     
                     continue
                     
                 
             
             num_new = self._file_seed_cache.AddFileSeeds( file_seeds )
             
             if num_new > 0:
                 
                 ClientImporting.WakeRepeatingJob( self._files_repeating_job )
                 
             
             parser_status = 'page checked OK with formula "' + simple_downloader_formula.GetName() + '" - ' + HydrusData.ToHumanInt( num_new ) + ' new urls'
             
             num_already_in_file_seed_cache = len( file_seeds ) - num_new
             
             if num_already_in_file_seed_cache > 0:
                 
                 parser_status += ' (' + HydrusData.ToHumanInt( num_already_in_file_seed_cache ) + ' already in queue)'
                 
             
             gallery_seed_status = CC.STATUS_SUCCESSFUL_AND_NEW
             
         except HydrusExceptions.ShutdownException:
             
             gallery_seed_status = CC.STATUS_VETOED
             parser_status = 'program is shutting down'
             
             return
             
         except HydrusExceptions.NotFoundException:
             
             gallery_seed_status = CC.STATUS_VETOED
             
             error_occurred = True
             
             parser_status = 'page 404'
             
         except HydrusExceptions.NetworkException as e:
             
             delay = HG.client_controller.new_options.GetInteger( 'downloader_network_error_delay' )
             
             self._DelayWork( delay, str( e ) )
             
             gallery_seed_status = CC.STATUS_ERROR
             error_occurred = True
             
             parser_status = str( e )
             
             HydrusData.PrintException( e )
             
         except Exception as e:
             
             gallery_seed_status = CC.STATUS_ERROR
             
             error_occurred = True
             
             parser_status = str( e )
             
         finally:
             
             gallery_seed_note = parser_status
             
             gallery_seed.SetStatus( gallery_seed_status, note = gallery_seed_note )
             
             self._gallery_seed_log.NotifyGallerySeedsUpdated( ( gallery_seed, ) )
             
         
         with self._lock:
             
             self._gallery_status = ClientImportControl.NeatenStatusText( parser_status )
             
         
         if error_occurred:
             
             time.sleep( 5 )
             
         
         return True
         
     else:
         
         with self._lock:
             
             self._gallery_status = ''
             
         
         return False
Example #13
0
    def _UpdateSerialisableInfo(self, version, old_serialisable_info):

        if version == 1:

            (url, serialisable_file_seed_cache, urls_to_filenames,
             urls_to_md5_base64, serialisable_file_import_options,
             serialisable_tag_import_options, times_to_check, check_period,
             last_check_time, paused) = old_serialisable_info

            checker_options = ClientImportOptions.CheckerOptions(
                intended_files_per_check=8,
                never_faster_than=300,
                never_slower_than=86400,
                death_file_velocity=(1, 86400))

            serialisable_checker_options = checker_options.GetSerialisableTuple(
            )

            files_paused = paused
            checking_paused = paused

            new_serialisable_info = (url, serialisable_file_seed_cache,
                                     urls_to_filenames, urls_to_md5_base64,
                                     serialisable_checker_options,
                                     serialisable_file_import_options,
                                     serialisable_tag_import_options,
                                     last_check_time, files_paused,
                                     checking_paused)

            return (2, new_serialisable_info)

        if version == 2:

            (url, serialisable_file_seed_cache, urls_to_filenames,
             urls_to_md5_base64, serialisable_checker_options,
             serialisable_file_import_options, serialisable_tag_import_options,
             last_check_time, files_paused,
             checking_paused) = old_serialisable_info

            checking_status = ClientImporting.CHECKER_STATUS_OK
            subject = 'unknown subject'

            new_serialisable_info = (url, serialisable_file_seed_cache,
                                     urls_to_filenames, urls_to_md5_base64,
                                     serialisable_checker_options,
                                     serialisable_file_import_options,
                                     serialisable_tag_import_options,
                                     last_check_time, files_paused,
                                     checking_paused, checking_status, subject)

            return (3, new_serialisable_info)

        if version == 3:

            (url, serialisable_file_seed_cache, urls_to_filenames,
             urls_to_md5_base64, serialisable_checker_options,
             serialisable_file_import_options, serialisable_tag_import_options,
             last_check_time, files_paused, checking_paused, checking_status,
             subject) = old_serialisable_info

            no_work_until = 0
            no_work_until_reason = ''

            new_serialisable_info = (url, serialisable_file_seed_cache,
                                     urls_to_filenames, urls_to_md5_base64,
                                     serialisable_checker_options,
                                     serialisable_file_import_options,
                                     serialisable_tag_import_options,
                                     last_check_time, files_paused,
                                     checking_paused, checking_status, subject,
                                     no_work_until, no_work_until_reason)

            return (4, new_serialisable_info)

        if version == 4:

            (url, serialisable_file_seed_cache, urls_to_filenames,
             urls_to_md5_base64, serialisable_checker_options,
             serialisable_file_import_options, serialisable_tag_import_options,
             last_check_time, files_paused, checking_paused, checking_status,
             subject, no_work_until,
             no_work_until_reason) = old_serialisable_info

            creation_time = HydrusData.GetNow()

            new_serialisable_info = (url, serialisable_file_seed_cache,
                                     urls_to_filenames, urls_to_md5_base64,
                                     serialisable_checker_options,
                                     serialisable_file_import_options,
                                     serialisable_tag_import_options,
                                     last_check_time, files_paused,
                                     checking_paused, checking_status, subject,
                                     no_work_until, no_work_until_reason,
                                     creation_time)

            return (5, new_serialisable_info)

        if version == 5:

            (url, serialisable_file_seed_cache, urls_to_filenames,
             urls_to_md5_base64, serialisable_checker_options,
             serialisable_file_import_options, serialisable_tag_import_options,
             last_check_time, files_paused, checking_paused, checking_status,
             subject, no_work_until, no_work_until_reason,
             creation_time) = old_serialisable_info

            gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog()

            serialisable_gallery_seed_log = gallery_seed_log.GetSerialisableTuple(
            )

            new_serialisable_info = (url, serialisable_gallery_seed_log,
                                     serialisable_file_seed_cache,
                                     urls_to_filenames, urls_to_md5_base64,
                                     serialisable_checker_options,
                                     serialisable_file_import_options,
                                     serialisable_tag_import_options,
                                     last_check_time, files_paused,
                                     checking_paused, checking_status, subject,
                                     no_work_until, no_work_until_reason,
                                     creation_time)

            return (6, new_serialisable_info)

        if version == 6:

            (url, serialisable_gallery_seed_log, serialisable_file_seed_cache,
             urls_to_filenames, urls_to_md5_base64,
             serialisable_checker_options, serialisable_file_import_options,
             serialisable_tag_import_options, last_check_time, files_paused,
             checking_paused, checking_status, subject, no_work_until,
             no_work_until_reason, creation_time) = old_serialisable_info

            fixed_service_keys_to_tags = ClientTags.ServiceKeysToTags()

            serialisable_fixed_service_keys_to_tags = fixed_service_keys_to_tags.GetSerialisableTuple(
            )

            new_serialisable_info = (url, serialisable_gallery_seed_log,
                                     serialisable_file_seed_cache,
                                     serialisable_fixed_service_keys_to_tags,
                                     serialisable_checker_options,
                                     serialisable_file_import_options,
                                     serialisable_tag_import_options,
                                     last_check_time, files_paused,
                                     checking_paused, checking_status, subject,
                                     no_work_until, no_work_until_reason,
                                     creation_time)

            return (7, new_serialisable_info)
Example #14
0
    def _CheckWatchableURL(self):
        def file_seeds_callable(file_seeds):

            return ClientImporting.UpdateFileSeedCacheWithFileSeeds(
                self._file_seed_cache, file_seeds)

        def status_hook(text):

            with self._lock:

                if len(text) > 0:

                    text = text.splitlines()[0]

                self._watcher_status = text

        def title_hook(text):

            with self._lock:

                if len(text) > 0:

                    text = text.splitlines()[0]

                self._subject = text

        gallery_seed = ClientImportGallerySeeds.GallerySeed(
            self._url, can_generate_more_pages=False)

        gallery_seed.SetFixedServiceKeysToTags(
            self._fixed_service_keys_to_tags)

        self._gallery_seed_log.AddGallerySeeds((gallery_seed, ))

        with self._lock:

            self._watcher_status = 'checking'

        try:

            (num_urls_added, num_urls_already_in_file_seed_cache,
             num_urls_total, result_404, added_new_gallery_pages,
             stop_reason) = gallery_seed.WorkOnURL(
                 'watcher', self._gallery_seed_log, file_seeds_callable,
                 status_hook, title_hook, self._NetworkJobFactory,
                 self._CheckerNetworkJobPresentationContextFactory,
                 self._file_import_options)

            if num_urls_added > 0:

                ClientImporting.WakeRepeatingJob(self._files_repeating_job)

            if result_404:

                with self._lock:

                    self._checking_paused = True

                    self._checking_status = ClientImporting.CHECKER_STATUS_404

            if gallery_seed.status == CC.STATUS_ERROR:

                # the [DEAD] stuff can override watcher status, so let's give a brief time for this to display the error

                with self._lock:

                    self._checking_paused = True

                    self._watcher_status = gallery_seed.note

                time.sleep(5)

        except HydrusExceptions.NetworkException as e:

            delay = HG.client_controller.new_options.GetInteger(
                'downloader_network_error_delay')

            self._DelayWork(delay, str(e))

            HydrusData.PrintException(e)

        watcher_status = gallery_seed.note
        watcher_status_should_stick = gallery_seed.status != CC.STATUS_SUCCESSFUL_AND_NEW

        with self._lock:

            if self._check_now:

                self._check_now = False

            self._watcher_status = watcher_status

            self._last_check_time = HydrusData.GetNow()

            self._UpdateFileVelocityStatus()

            self._UpdateNextCheckTime()

            self._Compact()

        if not watcher_status_should_stick:

            time.sleep(5)

            with self._lock:

                self._watcher_status = ''