Example #1
0
    def __init__(self,
                 paths=None,
                 file_import_options=None,
                 paths_to_tags=None,
                 delete_after_success=None):

        HydrusSerialisable.SerialisableBase.__init__(self)

        if paths is None:

            self._file_seed_cache = None

        else:

            self._file_seed_cache = ClientImportFileSeeds.FileSeedCache()

            file_seeds = []

            for path in paths:

                file_seed = ClientImportFileSeeds.FileSeed(
                    ClientImportFileSeeds.FILE_SEED_TYPE_HDD, path)

                try:

                    s = os.stat(path)

                    file_seed.source_time = int(min(s.st_mtime, s.st_ctime))

                except:

                    pass

                file_seeds.append(file_seed)

            self._file_seed_cache.AddFileSeeds(file_seeds)

        self._file_import_options = file_import_options
        self._paths_to_tags = paths_to_tags
        self._delete_after_success = delete_after_success

        self._current_action = ''
        self._paused = False

        self._lock = threading.Lock()

        self._files_repeating_job = None

        HG.client_controller.sub(self, 'NotifyFileSeedsUpdated',
                                 'file_seed_cache_file_seeds_updated')
Example #2
0
 def SetTuple( self, name, path, mimes, file_import_options, tag_import_options, tag_service_keys_to_filename_tagging_options, actions, action_locations, period, check_regularly, paused, check_now, show_working_popup, publish_files_to_popup_button, publish_files_to_page ):
     
     if path != self._path:
         
         self._file_seed_cache = ClientImportFileSeeds.FileSeedCache()
         
     
     if set( mimes ) != set( self._mimes ):
         
         self._file_seed_cache.RemoveFileSeedsByStatus( ( CC.STATUS_VETOED, ) )
         
     
     self._name = name
     self._path = path
     self._mimes = mimes
     self._file_import_options = file_import_options
     self._tag_import_options = tag_import_options
     self._tag_service_keys_to_filename_tagging_options = tag_service_keys_to_filename_tagging_options
     self._actions = actions
     self._action_locations = action_locations
     self._period = period
     self._check_regularly = check_regularly
     self._paused = paused
     self._check_now = check_now
     self._show_working_popup = show_working_popup
     self._publish_files_to_popup_button = publish_files_to_popup_button
     self._publish_files_to_page = publish_files_to_page
Example #3
0
    def __init__(self):

        HydrusSerialisable.SerialisableBase.__init__(self)

        self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog()
        self._file_seed_cache = ClientImportFileSeeds.FileSeedCache()
        self._file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions(
            'loud')
        self._tag_import_options = ClientImportOptions.TagImportOptions(
            is_default=True)
        self._paused = False

        self._downloader_key = HydrusData.GenerateKey()

        self._lock = threading.Lock()

        self._files_network_job = None
        self._gallery_network_job = None

        self._files_repeating_job = None
        self._gallery_repeating_job = None

        HG.client_controller.sub(self, 'NotifyFileSeedsUpdated',
                                 'file_seed_cache_file_seeds_updated')
        HG.client_controller.sub(self, 'NotifyGallerySeedsUpdated',
                                 'gallery_seed_log_gallery_seeds_updated')
Example #4
0
    def __init__(self):

        HydrusSerialisable.SerialisableBase.__init__(self)

        file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions(
            'loud')

        self._pending_jobs = []
        self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog()
        self._file_seed_cache = ClientImportFileSeeds.FileSeedCache()
        self._file_import_options = file_import_options
        self._formula_name = 'all files linked by images in page'
        self._queue_paused = False
        self._files_paused = False

        self._parser_status = ''
        self._current_action = ''

        self._lock = threading.Lock()

        self._files_network_job = None
        self._page_network_job = None

        self._files_repeating_job = None
        self._queue_repeating_job = None

        HG.client_controller.sub(self, 'NotifyFileSeedsUpdated',
                                 'file_seed_cache_file_seeds_updated')
Example #5
0
def ConvertAllParseResultsToFileSeeds(all_parse_results, source_url,
                                      file_import_options):

    file_seeds = []

    seen_urls = set()

    for parse_results in all_parse_results:

        parsed_urls = ClientParsing.GetURLsFromParseResults(
            parse_results, (HC.URL_TYPE_DESIRED, ), only_get_top_priority=True)

        parsed_urls = HydrusData.DedupeList(parsed_urls)

        parsed_urls = [url for url in parsed_urls if url not in seen_urls]

        seen_urls.update(parsed_urls)

        # note we do this recursively due to parse_results being appropriate only for these urls--don't move this out again, or tags will be messed up

        for url in parsed_urls:

            file_seed = ClientImportFileSeeds.FileSeed(
                ClientImportFileSeeds.FILE_SEED_TYPE_URL, url)

            file_seed.SetReferralURL(source_url)

            file_seed.AddParseResults(parse_results, file_import_options)

            file_seeds.append(file_seed)

    return file_seeds
Example #6
0
 def _RegenerateStatus( self ):
     
     file_seed_caches = [ watcher.GetFileSeedCache() for watcher in self._watchers ]
     
     self._status_cache = ClientImportFileSeeds.GenerateFileSeedCachesStatus( file_seed_caches )
     
     self._status_dirty = False
     self._status_cache_generation_time = HydrusData.GetNow()
Example #7
0
def THREADDownloadURL(job_key, url, url_string):

    job_key.SetVariable('popup_title', url_string)
    job_key.SetVariable('popup_text_1', 'downloading and importing')

    #

    file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions(
        'loud')

    def network_job_factory(*args, **kwargs):

        network_job = ClientNetworkingJobs.NetworkJob(*args, **kwargs)

        network_job.OverrideBandwidth(30)

        return network_job

    network_job_presentation_context_factory = GenerateSinglePopupNetworkJobPresentationContextFactory(
        job_key)

    file_seed = ClientImportFileSeeds.FileSeed(
        ClientImportFileSeeds.FILE_SEED_TYPE_URL, url)

    #

    try:

        file_seed.DownloadAndImportRawFile(
            url, file_import_options, network_job_factory,
            network_job_presentation_context_factory)

        status = file_seed.status

        if status in CC.SUCCESSFUL_IMPORT_STATES:

            if status == CC.STATUS_SUCCESSFUL_AND_NEW:

                job_key.SetVariable('popup_text_1', 'successful!')

            elif status == CC.STATUS_SUCCESSFUL_BUT_REDUNDANT:

                job_key.SetVariable('popup_text_1',
                                    'was already in the database!')

            if file_seed.HasHash():

                hash = file_seed.GetHash()

                job_key.SetVariable('popup_files', ([hash], 'download'))

        elif status == CC.STATUS_DELETED:

            job_key.SetVariable('popup_text_1', 'had already been deleted!')

    finally:

        job_key.Finish()
Example #8
0
 def __init__( self, name, path = '', file_import_options = None, tag_import_options = None, tag_service_keys_to_filename_tagging_options = None, mimes = None, actions = None, action_locations = None, period = 3600, check_regularly = True, show_working_popup = True, publish_files_to_popup_button = True, publish_files_to_page = False ):
     
     if mimes is None:
         
         mimes = HC.ALLOWED_MIMES
         
     
     if file_import_options is None:
         
         file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'quiet' )
         
     
     if tag_import_options is None:
         
         tag_import_options = ClientImportOptions.TagImportOptions()
         
     
     if tag_service_keys_to_filename_tagging_options is None:
         
         tag_service_keys_to_filename_tagging_options = {}
         
     
     if actions is None:
         
         actions = {}
         
         actions[ CC.STATUS_SUCCESSFUL_AND_NEW ] = CC.IMPORT_FOLDER_IGNORE
         actions[ CC.STATUS_SUCCESSFUL_BUT_REDUNDANT ] = CC.IMPORT_FOLDER_IGNORE
         actions[ CC.STATUS_DELETED ] = CC.IMPORT_FOLDER_IGNORE
         actions[ CC.STATUS_ERROR ] = CC.IMPORT_FOLDER_IGNORE
         
     
     if action_locations is None:
         
         action_locations = {}
         
     
     HydrusSerialisable.SerialisableBaseNamed.__init__( self, name )
     
     self._path = path
     self._mimes = mimes
     self._file_import_options = file_import_options
     self._tag_import_options = tag_import_options
     self._tag_service_keys_to_filename_tagging_options = tag_service_keys_to_filename_tagging_options
     self._actions = actions
     self._action_locations = action_locations
     self._period = period
     self._check_regularly = check_regularly
     
     self._file_seed_cache = ClientImportFileSeeds.FileSeedCache()
     self._last_checked = 0
     self._paused = False
     self._check_now = False
     
     self._show_working_popup = show_working_popup
     self._publish_files_to_popup_button = publish_files_to_popup_button
     self._publish_files_to_page = publish_files_to_page
    def __init__(self):

        HydrusSerialisable.SerialisableBase.__init__(self)

        self._page_key = 'initialising page key'
        self._publish_to_page = False

        self._url = ''

        self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog()
        self._file_seed_cache = ClientImportFileSeeds.FileSeedCache()

        self._urls_to_filenames = {}
        self._urls_to_md5_base64 = {}
        self._checker_options = HG.client_controller.new_options.GetDefaultWatcherCheckerOptions(
        )
        self._file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions(
            'loud')
        self._tag_import_options = ClientImportOptions.TagImportOptions(
            is_default=True)
        self._last_check_time = 0
        self._checking_status = ClientImporting.CHECKER_STATUS_OK
        self._subject = 'unknown subject'

        self._next_check_time = None

        self._download_control_file_set = None
        self._download_control_file_clear = None
        self._download_control_checker_set = None
        self._download_control_checker_clear = None

        self._check_now = False
        self._files_paused = False
        self._checking_paused = False

        self._no_work_until = 0
        self._no_work_until_reason = ''

        self._creation_time = HydrusData.GetNow()

        self._file_velocity_status = ''
        self._current_action = ''
        self._watcher_status = ''

        self._watcher_key = HydrusData.GenerateKey()

        self._lock = threading.Lock()

        self._last_pubbed_page_name = ''

        self._files_repeating_job = None
        self._checker_repeating_job = None

        HG.client_controller.sub(self, 'NotifyFileSeedsUpdated',
                                 'file_seed_cache_file_seeds_updated')
Example #10
0
    def _RegenerateStatus(self):

        file_seed_caches = [
            gallery_import.GetFileSeedCache()
            for gallery_import in self._gallery_imports
        ]

        self._status_cache = ClientImportFileSeeds.GenerateFileSeedCachesStatus(
            file_seed_caches)

        self._status_dirty = False
        self._status_cache_generation_time = HydrusData.GetNow()
Example #11
0
def UpdateFileSeedCacheWithAllParseResults(file_seed_cache,
                                           all_parse_results,
                                           source_url,
                                           max_new_urls_allowed=None):

    new_file_seeds = []

    num_urls_added = 0
    num_urls_already_in_file_seed_cache = 0
    num_urls_total = 0

    for parse_results in all_parse_results:

        parsed_urls = ClientParsing.GetURLsFromParseResults(
            parse_results, (HC.URL_TYPE_DESIRED, ), only_get_top_priority=True)

        for url in parsed_urls:

            num_urls_total += 1

            if max_new_urls_allowed is not None and num_urls_added == max_new_urls_allowed:

                continue

            file_seed = ClientImportFileSeeds.FileSeed(
                ClientImportFileSeeds.FILE_SEED_TYPE_URL, url)

            file_seed.SetReferralURL(source_url)

            if file_seed_cache.HasFileSeed(file_seed):

                num_urls_already_in_file_seed_cache += 1

            else:

                num_urls_added += 1

                file_seed.AddParseResults(parse_results)

                new_file_seeds.append(file_seed)

    file_seed_cache.AddFileSeeds(new_file_seeds)

    return (num_urls_added, num_urls_already_in_file_seed_cache,
            num_urls_total)
Example #12
0
    def _ImportSources(self, sources):

        file_seed_cache = self._file_seed_cache_get_callable()

        if sources[0].startswith('http'):

            file_seed_type = ClientImportFileSeeds.FILE_SEED_TYPE_URL

        else:

            file_seed_type = ClientImportFileSeeds.FILE_SEED_TYPE_HDD

        file_seeds = [
            ClientImportFileSeeds.FileSeed(file_seed_type, source)
            for source in sources
        ]

        file_seed_cache.AddFileSeeds(file_seeds)
Example #13
0
    def PendURLs(self, urls):

        with self._lock:

            urls = filter(lambda u: len(u) > 1,
                          urls)  # > _1_ to take out the occasional whitespace

            file_seeds = []

            gallery_seeds = []

            for url in urls:

                url_match = HG.client_controller.network_engine.domain_manager.GetURLMatch(
                    url)

                if url_match is None or url_match.GetURLType() in (
                        HC.URL_TYPE_FILE, HC.URL_TYPE_POST):

                    file_seed = ClientImportFileSeeds.FileSeed(
                        ClientImportFileSeeds.FILE_SEED_TYPE_URL, url)

                    file_seeds.append(file_seed)

                else:

                    can_generate_more_pages = False

                    gallery_seed = ClientImportGallerySeeds.GallerySeed(
                        url, can_generate_more_pages=can_generate_more_pages)

                    gallery_seeds.append(gallery_seed)

            if len(gallery_seeds) > 0:

                self._gallery_seed_log.AddGallerySeeds(gallery_seeds)

                ClientImporting.WakeRepeatingJob(self._gallery_repeating_job)

            if len(file_seeds) > 0:

                self._file_seed_cache.AddFileSeeds(file_seeds)

                ClientImporting.WakeRepeatingJob(self._files_repeating_job)
Example #14
0
 def GetPage( self, gallery_url ):
     
     data = self._FetchData( gallery_url )
     
     ( page_of_urls_and_tags, definitely_no_more_pages ) = self._ParseGalleryPage( data, gallery_url )
     
     import ClientImportFileSeeds
     
     page_of_file_seeds = []
     
     for ( url, tags ) in page_of_urls_and_tags:
         
         file_seed = ClientImportFileSeeds.FileSeed( ClientImportFileSeeds.FILE_SEED_TYPE_URL, url )
         
         file_seed.AddTags( tags )
         
         page_of_file_seeds.append( file_seed )
         
     
     return ( page_of_file_seeds, definitely_no_more_pages )
Example #15
0
    def _CheckFolder(self, job_key):

        filenames = os.listdir(HydrusData.ToUnicode(self._path))

        raw_paths = [
            os.path.join(self._path, filename) for filename in filenames
        ]

        all_paths = ClientFiles.GetAllPaths(raw_paths)

        all_paths = HydrusPaths.FilterFreePaths(all_paths)

        file_seeds = []

        for path in all_paths:

            if job_key.IsCancelled():

                break

            if path.endswith('.txt'):

                continue

            file_seed = ClientImportFileSeeds.FileSeed(
                ClientImportFileSeeds.FILE_SEED_TYPE_HDD, path)

            if not self._file_seed_cache.HasFileSeed(file_seed):

                file_seeds.append(file_seed)

            job_key.SetVariable(
                'popup_text_1', 'checking: found ' +
                HydrusData.ToHumanInt(len(file_seeds)) + ' new files')

        self._file_seed_cache.AddFileSeeds(file_seeds)

        self._last_checked = HydrusData.GetNow()
        self._check_now = False
Example #16
0
    def _WorkOnQueue(self, page_key):

        if len(self._pending_jobs) > 0:

            with self._lock:

                (url, simple_downloader_formula) = self._pending_jobs.pop(0)

                self._parser_status = 'checking ' + url

            error_occurred = False

            try:

                gallery_seed = ClientImportGallerySeeds.GallerySeed(
                    url, can_generate_more_pages=False)

                self._gallery_seed_log.AddGallerySeeds((gallery_seed, ))

                network_job = ClientNetworkingJobs.NetworkJobDownloader(
                    page_key, 'GET', url)

                network_job.OverrideBandwidth(30)

                HG.client_controller.network_engine.AddJob(network_job)

                with self._PageNetworkJobPresentationContextFactory(
                        network_job):

                    network_job.WaitUntilDone()

                data = network_job.GetContent()

                #

                parsing_context = {}

                parsing_context['url'] = url

                parsing_formula = simple_downloader_formula.GetFormula()

                file_seeds = []

                for parsed_text in parsing_formula.Parse(
                        parsing_context, data):

                    try:

                        file_url = urlparse.urljoin(url, parsed_text)

                        file_seed = ClientImportFileSeeds.FileSeed(
                            ClientImportFileSeeds.FILE_SEED_TYPE_URL, file_url)

                        file_seed.SetReferralURL(url)

                        file_seeds.append(file_seed)

                    except:

                        continue

                num_new = self._file_seed_cache.AddFileSeeds(file_seeds)

                if num_new > 0:

                    ClientImporting.WakeRepeatingJob(self._files_repeating_job)

                parser_status = 'page checked OK with formula "' + simple_downloader_formula.GetName(
                ) + '" - ' + HydrusData.ToHumanInt(num_new) + ' new urls'

                num_already_in_file_seed_cache = len(file_seeds) - num_new

                if num_already_in_file_seed_cache > 0:

                    parser_status += ' (' + HydrusData.ToHumanInt(
                        num_already_in_file_seed_cache) + ' already in queue)'

                gallery_seed_status = CC.STATUS_SUCCESSFUL_AND_NEW

            except HydrusExceptions.ShutdownException:

                gallery_seed_status = CC.STATUS_VETOED
                parser_status = 'program is shutting down'

                return

            except HydrusExceptions.NotFoundException:

                gallery_seed_status = CC.STATUS_VETOED

                error_occurred = True

                parser_status = 'page 404'

            except Exception as e:

                gallery_seed_status = CC.STATUS_ERROR

                error_occurred = True

                parser_status = HydrusData.ToUnicode(e)

            finally:

                gallery_seed_note = parser_status

                gallery_seed.SetStatus(gallery_seed_status,
                                       note=gallery_seed_note)

                self._gallery_seed_log.NotifyGallerySeedsUpdated(
                    (gallery_seed, ))

            with self._lock:

                self._parser_status = parser_status

            if error_occurred:

                time.sleep(5)

            return True

        else:

            with self._lock:

                self._parser_status = ''

            return False
Example #17
0
def THREADDownloadURLs(job_key, urls, title):

    job_key.SetVariable('popup_title', title)
    job_key.SetVariable('popup_text_1', 'initialising')

    num_successful = 0
    num_redundant = 0
    num_deleted = 0
    num_failed = 0

    presentation_hashes = []
    presentation_hashes_fast = set()

    file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions(
        'loud')

    def network_job_factory(*args, **kwargs):

        network_job = ClientNetworkingJobs.NetworkJob(*args, **kwargs)

        network_job.OverrideBandwidth()

        return network_job

    network_job_presentation_context_factory = GenerateMultiplePopupNetworkJobPresentationContextFactory(
        job_key)

    for (i, url) in enumerate(urls):

        (i_paused, should_quit) = job_key.WaitIfNeeded()

        if should_quit:

            break

        job_key.SetVariable(
            'popup_text_1',
            HydrusData.ConvertValueRangeToPrettyString(i + 1, len(urls)))
        job_key.SetVariable('popup_gauge_1', (i + 1, len(urls)))

        file_seed = ClientImportFileSeeds.FileSeed(
            ClientImportFileSeeds.FILE_SEED_TYPE_URL, url)

        try:

            file_seed.DownloadAndImportRawFile(
                url, file_import_options, network_job_factory,
                network_job_presentation_context_factory)

            status = file_seed.status

            if status in CC.SUCCESSFUL_IMPORT_STATES:

                if status == CC.STATUS_SUCCESSFUL_AND_NEW:

                    num_successful += 1

                elif status == CC.STATUS_SUCCESSFUL_BUT_REDUNDANT:

                    num_redundant += 1

                if file_seed.HasHash():

                    hash = file_seed.GetHash()

                    if hash not in presentation_hashes_fast:

                        presentation_hashes.append(hash)

                    presentation_hashes_fast.add(hash)

            elif status == CC.STATUS_DELETED:

                num_deleted += 1

        except Exception as e:

            num_failed += 1

            HydrusData.Print(url + ' failed to import!')
            HydrusData.PrintException(e)

    job_key.DeleteVariable('popup_network_job')

    text_components = []

    if num_successful > 0:

        text_components.append(
            HydrusData.ToHumanInt(num_successful) + ' successful')

    if num_redundant > 0:

        text_components.append(
            HydrusData.ToHumanInt(num_redundant) + ' already in db')

    if num_deleted > 0:

        text_components.append(HydrusData.ToHumanInt(num_deleted) + ' deleted')

    if num_failed > 0:

        text_components.append(
            HydrusData.ToHumanInt(num_failed) +
            ' failed (errors written to log)')

    job_key.SetVariable('popup_text_1', ', '.join(text_components))

    if len(presentation_hashes) > 0:

        job_key.SetVariable('popup_files', (presentation_hashes, 'downloads'))

    job_key.DeleteVariable('popup_gauge_1')

    job_key.Finish()
Example #18
0
def DAEMONDownloadFiles( controller ):
    
    hashes = controller.Read( 'downloads' )
    
    num_downloads = len( hashes )
    
    if num_downloads > 0:
        
        client_files_manager = controller.client_files_manager
        
        successful_hashes = set()
        
        job_key = ClientThreading.JobKey()
        
        job_key.SetVariable( 'popup_text_1', 'initialising downloader' )
        
        controller.pub( 'message', job_key )
        
        for hash in hashes:
            
            job_key.SetVariable( 'popup_text_1', 'downloading ' + HydrusData.ToHumanInt( num_downloads - len( successful_hashes ) ) + ' files from repositories' )
            
            ( media_result, ) = controller.Read( 'media_results', ( hash, ) )
            
            service_keys = list( media_result.GetLocationsManager().GetCurrent() )
            
            random.shuffle( service_keys )
            
            for service_key in service_keys:
                
                if service_key == CC.LOCAL_FILE_SERVICE_KEY: break
                elif service_key == CC.TRASH_SERVICE_KEY: continue
                
                try:
                    
                    service = controller.services_manager.GetService( service_key )
                    
                except:
                    
                    continue
                    
                
                if service.GetServiceType() == HC.FILE_REPOSITORY:
                    
                    file_repository = service
                    
                    if file_repository.IsFunctional():
                        
                        try:
                            
                            ( os_file_handle, temp_path ) = ClientPaths.GetTempPath()
                            
                            try:
                                
                                file_repository.Request( HC.GET, 'file', { 'hash' : hash }, temp_path = temp_path )
                                
                                controller.WaitUntilModelFree()
                                
                                exclude_deleted = False # this is the important part here
                                allow_decompression_bombs = True
                                min_size = None
                                max_size = None
                                max_gif_size = None
                                min_resolution = None
                                max_resolution = None
                                automatic_archive = False
                                
                                file_import_options = ClientImportOptions.FileImportOptions()
                                
                                file_import_options.SetPreImportOptions( exclude_deleted, allow_decompression_bombs, min_size, max_size, max_gif_size, min_resolution, max_resolution )
                                file_import_options.SetPostImportOptions( automatic_archive )
                                
                                file_import_job = ClientImportFileSeeds.FileImportJob( temp_path, file_import_options )
                                
                                client_files_manager.ImportFile( file_import_job )
                                
                                successful_hashes.add( hash )
                                
                                break
                                
                            finally:
                                
                                HydrusPaths.CleanUpTempPath( os_file_handle, temp_path )
                                
                            
                        except HydrusExceptions.ServerBusyException:
                            
                            job_key.SetVariable( 'popup_text_1', file_repository.GetName() + ' was busy. waiting 30s before trying again' )
                            
                            time.sleep( 30 )
                            
                            job_key.Delete()
                            
                            controller.pub( 'notify_new_downloads' )
                            
                            return
                            
                        except Exception as e:
                            
                            HydrusData.ShowText( 'Error downloading file!' )
                            HydrusData.ShowException( e )
                            
                        
                    
                elif service.GetServiceType() == HC.IPFS:
                    
                    multihashes = HG.client_controller.Read( 'service_filenames', service_key, { hash } )
                    
                    if len( multihashes ) > 0:
                        
                        multihash = multihashes[0]
                        
                        # this actually calls to a thread that can launch gui 'select from tree' stuff, so let's just break at this point
                        service.ImportFile( multihash )
                        
                        break
                        
                    
                
                if HydrusThreading.IsThreadShuttingDown():
                    
                    return
                    
                
            
        
        if len( successful_hashes ) > 0:
            
            job_key.SetVariable( 'popup_text_1', HydrusData.ToHumanInt( len( successful_hashes ) ) + ' files downloaded' )
            
        
        job_key.Delete()
Example #19
0
    def test_checker_options(self):

        regular_checker_options = ClientImportOptions.CheckerOptions(
            intended_files_per_check=5,
            never_faster_than=30,
            never_slower_than=86400,
            death_file_velocity=(1, 86400))
        fast_checker_options = ClientImportOptions.CheckerOptions(
            intended_files_per_check=2,
            never_faster_than=30,
            never_slower_than=86400,
            death_file_velocity=(1, 86400))
        slow_checker_options = ClientImportOptions.CheckerOptions(
            intended_files_per_check=10,
            never_faster_than=30,
            never_slower_than=86400,
            death_file_velocity=(1, 86400))
        callous_checker_options = ClientImportOptions.CheckerOptions(
            intended_files_per_check=5,
            never_faster_than=30,
            never_slower_than=86400,
            death_file_velocity=(1, 60))

        empty_file_seed_cache = ClientImportFileSeeds.FileSeedCache()

        file_seed_cache = ClientImportFileSeeds.FileSeedCache()

        last_check_time = 10000000

        one_day_before = last_check_time - 86400

        for i in range(50):

            url = 'https://wew.lad/' + os.urandom(16).encode('hex')

            file_seed = ClientImportFileSeeds.FileSeed(
                ClientImportFileSeeds.FILE_SEED_TYPE_URL, url)

            file_seed.source_time = one_day_before - 10

            file_seed_cache.AddFileSeeds((file_seed, ))

        for i in range(50):

            url = 'https://wew.lad/' + os.urandom(16).encode('hex')

            file_seed = ClientImportFileSeeds.FileSeed(
                ClientImportFileSeeds.FILE_SEED_TYPE_URL, url)

            file_seed.source_time = last_check_time - 600

            file_seed_cache.AddFileSeeds((file_seed, ))

        bare_file_seed_cache = ClientImportFileSeeds.FileSeedCache()

        url = 'https://wew.lad/' + 'early'

        file_seed = ClientImportFileSeeds.FileSeed(
            ClientImportFileSeeds.FILE_SEED_TYPE_URL, url)

        file_seed.source_time = one_day_before - 10

        bare_file_seed_cache.AddFileSeeds((file_seed, ))

        url = 'https://wew.lad/' + 'in_time_delta'

        file_seed = ClientImportFileSeeds.FileSeed(
            ClientImportFileSeeds.FILE_SEED_TYPE_URL, url)

        file_seed.source_time = one_day_before + 10

        bare_file_seed_cache.AddFileSeeds((file_seed, ))

        busy_file_seed_cache = ClientImportFileSeeds.FileSeedCache()

        url = 'https://wew.lad/' + 'early'

        file_seed = ClientImportFileSeeds.FileSeed(
            ClientImportFileSeeds.FILE_SEED_TYPE_URL, url)

        file_seed.source_time = one_day_before - 10

        busy_file_seed_cache.AddFileSeeds((file_seed, ))

        for i in range(8640):

            url = 'https://wew.lad/' + os.urandom(16).encode('hex')

            file_seed = ClientImportFileSeeds.FileSeed(
                ClientImportFileSeeds.FILE_SEED_TYPE_URL, url)

            file_seed.source_time = one_day_before + ((i + 1) * 10) - 1

            busy_file_seed_cache.AddFileSeeds((file_seed, ))

        new_thread_file_seed_cache = ClientImportFileSeeds.FileSeedCache()

        for i in range(10):

            url = 'https://wew.lad/' + os.urandom(16).encode('hex')

            file_seed = ClientImportFileSeeds.FileSeed(
                ClientImportFileSeeds.FILE_SEED_TYPE_URL, url)

            file_seed.source_time = last_check_time - 600

            new_thread_file_seed_cache.AddFileSeeds((file_seed, ))

        # empty
        # should say ok if last_check_time is 0, so it can initialise
        # otherwise sperg out safely

        self.assertFalse(
            regular_checker_options.IsDead(empty_file_seed_cache, 0))

        self.assertEqual(
            regular_checker_options.GetPrettyCurrentVelocity(
                empty_file_seed_cache, 0), 'no files yet')

        self.assertEqual(
            regular_checker_options.GetNextCheckTime(empty_file_seed_cache, 0,
                                                     0), 0)

        self.assertTrue(
            regular_checker_options.IsDead(empty_file_seed_cache,
                                           last_check_time))

        self.assertEqual(
            regular_checker_options.GetPrettyCurrentVelocity(
                empty_file_seed_cache, last_check_time),
            'no files, unable to determine velocity')

        # regular
        # current velocity should be 50 files per day for the day ones and 0 files per min for the callous minute one

        self.assertFalse(
            regular_checker_options.IsDead(file_seed_cache, last_check_time))
        self.assertFalse(
            fast_checker_options.IsDead(file_seed_cache, last_check_time))
        self.assertFalse(
            slow_checker_options.IsDead(file_seed_cache, last_check_time))
        self.assertTrue(
            callous_checker_options.IsDead(file_seed_cache, last_check_time))

        self.assertEqual(
            regular_checker_options.GetPrettyCurrentVelocity(
                file_seed_cache, last_check_time),
            u'at last check, found 50 files in previous 1 day')
        self.assertEqual(
            fast_checker_options.GetPrettyCurrentVelocity(
                file_seed_cache, last_check_time),
            u'at last check, found 50 files in previous 1 day')
        self.assertEqual(
            slow_checker_options.GetPrettyCurrentVelocity(
                file_seed_cache, last_check_time),
            u'at last check, found 50 files in previous 1 day')
        self.assertEqual(
            callous_checker_options.GetPrettyCurrentVelocity(
                file_seed_cache, last_check_time),
            u'at last check, found 0 files in previous 1 minute')

        self.assertEqual(
            regular_checker_options.GetNextCheckTime(file_seed_cache,
                                                     last_check_time, 0),
            last_check_time + 8640)
        self.assertEqual(
            fast_checker_options.GetNextCheckTime(file_seed_cache,
                                                  last_check_time, 0),
            last_check_time + 3456)
        self.assertEqual(
            slow_checker_options.GetNextCheckTime(file_seed_cache,
                                                  last_check_time, 0),
            last_check_time + 17280)

        # bare
        # 1 files per day

        self.assertFalse(
            regular_checker_options.IsDead(bare_file_seed_cache,
                                           last_check_time))
        self.assertTrue(
            callous_checker_options.IsDead(bare_file_seed_cache,
                                           last_check_time))

        self.assertEqual(
            regular_checker_options.GetPrettyCurrentVelocity(
                bare_file_seed_cache, last_check_time),
            u'at last check, found 1 files in previous 1 day')

        self.assertEqual(
            regular_checker_options.GetNextCheckTime(bare_file_seed_cache,
                                                     last_check_time, 0),
            last_check_time + 86400)
        self.assertEqual(
            fast_checker_options.GetNextCheckTime(bare_file_seed_cache,
                                                  last_check_time, 0),
            last_check_time + 86400)
        self.assertEqual(
            slow_checker_options.GetNextCheckTime(bare_file_seed_cache,
                                                  last_check_time, 0),
            last_check_time + 86400)

        # busy
        # 8640 files per day, 6 files per minute

        self.assertFalse(
            regular_checker_options.IsDead(busy_file_seed_cache,
                                           last_check_time))
        self.assertFalse(
            fast_checker_options.IsDead(busy_file_seed_cache, last_check_time))
        self.assertFalse(
            slow_checker_options.IsDead(busy_file_seed_cache, last_check_time))
        self.assertFalse(
            callous_checker_options.IsDead(busy_file_seed_cache,
                                           last_check_time))

        self.assertEqual(
            regular_checker_options.GetPrettyCurrentVelocity(
                busy_file_seed_cache, last_check_time),
            u'at last check, found 8,640 files in previous 1 day')
        self.assertEqual(
            callous_checker_options.GetPrettyCurrentVelocity(
                busy_file_seed_cache, last_check_time),
            u'at last check, found 6 files in previous 1 minute')

        self.assertEqual(
            regular_checker_options.GetNextCheckTime(busy_file_seed_cache,
                                                     last_check_time, 0),
            last_check_time + 50)
        self.assertEqual(
            fast_checker_options.GetNextCheckTime(busy_file_seed_cache,
                                                  last_check_time, 0),
            last_check_time + 30)
        self.assertEqual(
            slow_checker_options.GetNextCheckTime(busy_file_seed_cache,
                                                  last_check_time, 0),
            last_check_time + 100)
        self.assertEqual(
            callous_checker_options.GetNextCheckTime(busy_file_seed_cache,
                                                     last_check_time, 0),
            last_check_time + 50)

        # new thread
        # only had files from ten mins ago, so timings are different

        self.assertFalse(
            regular_checker_options.IsDead(new_thread_file_seed_cache,
                                           last_check_time))
        self.assertFalse(
            fast_checker_options.IsDead(new_thread_file_seed_cache,
                                        last_check_time))
        self.assertFalse(
            slow_checker_options.IsDead(new_thread_file_seed_cache,
                                        last_check_time))
        self.assertTrue(
            callous_checker_options.IsDead(new_thread_file_seed_cache,
                                           last_check_time))

        self.assertEqual(
            regular_checker_options.GetPrettyCurrentVelocity(
                new_thread_file_seed_cache, last_check_time),
            u'at last check, found 10 files in previous 10 minutes')
        self.assertEqual(
            fast_checker_options.GetPrettyCurrentVelocity(
                new_thread_file_seed_cache, last_check_time),
            u'at last check, found 10 files in previous 10 minutes')
        self.assertEqual(
            slow_checker_options.GetPrettyCurrentVelocity(
                new_thread_file_seed_cache, last_check_time),
            u'at last check, found 10 files in previous 10 minutes')
        self.assertEqual(
            callous_checker_options.GetPrettyCurrentVelocity(
                new_thread_file_seed_cache, last_check_time),
            u'at last check, found 0 files in previous 1 minute')

        # these would be 360, 120, 600, but the 'don't check faster the time since last file post' bumps this up
        self.assertEqual(
            regular_checker_options.GetNextCheckTime(
                new_thread_file_seed_cache, last_check_time, 0),
            last_check_time + 600)
        self.assertEqual(
            fast_checker_options.GetNextCheckTime(new_thread_file_seed_cache,
                                                  last_check_time, 0),
            last_check_time + 600)
        self.assertEqual(
            slow_checker_options.GetNextCheckTime(new_thread_file_seed_cache,
                                                  last_check_time, 0),
            last_check_time + 600)

        # Let's test these new static timings, where if faster_than == slower_than, we just add that period to the 'last_next_check_time' (e.g. checking every sunday night)

        static_checker_options = ClientImportOptions.CheckerOptions(
            intended_files_per_check=5,
            never_faster_than=3600,
            never_slower_than=3600,
            death_file_velocity=(1, 3600))

        self.assertTrue(
            static_checker_options.IsDead(bare_file_seed_cache,
                                          last_check_time))

        last_next_check_time = last_check_time - 200

        with patch.object(HydrusData,
                          'GetNow',
                          return_value=last_check_time + 10):

            self.assertEqual(
                static_checker_options.GetNextCheckTime(
                    new_thread_file_seed_cache, last_check_time,
                    last_next_check_time), last_next_check_time + 3600)
Example #20
0
    def __init__(self,
                 query=None,
                 source_name=None,
                 initial_search_urls=None,
                 start_file_queue_paused=False,
                 start_gallery_queue_paused=False):

        if query is None:

            query = 'samus_aran'

        if source_name is None:

            source_name = 'unknown'

        if initial_search_urls is None:

            initial_search_urls = []

        HydrusSerialisable.SerialisableBase.__init__(self)

        self._creation_time = HydrusData.GetNow()
        self._gallery_import_key = HydrusData.GenerateKey()

        self._query = query

        self._source_name = source_name

        self._page_key = 'initialising page key'
        self._publish_to_page = False

        self._current_page_index = 0
        self._num_new_urls_found = 0
        self._num_urls_found = 0

        self._file_limit = HC.options['gallery_file_limit']

        self._files_paused = start_file_queue_paused
        self._gallery_paused = start_gallery_queue_paused

        self._file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions(
            'loud')

        self._tag_import_options = ClientImportOptions.TagImportOptions(
            is_default=True)

        self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog()

        gallery_seeds = [
            ClientImportGallerySeeds.GallerySeed(url)
            for url in initial_search_urls
        ]

        self._gallery_seed_log.AddGallerySeeds(gallery_seeds)

        self._file_seed_cache = ClientImportFileSeeds.FileSeedCache()

        self._no_work_until = 0
        self._no_work_until_reason = ''

        self._lock = threading.Lock()

        self._gallery_status = ''
        self._gallery_status_can_change_timestamp = 0

        self._current_action = ''

        self._file_network_job = None
        self._gallery_network_job = None

        self._files_repeating_job = None
        self._gallery_repeating_job = None

        HG.client_controller.sub(self, 'NotifyFileSeedsUpdated',
                                 'file_seed_cache_file_seeds_updated')
        HG.client_controller.sub(self, 'NotifyGallerySeedsUpdated',
                                 'gallery_seed_log_gallery_seeds_updated')