Esempio n. 1
0
    def _import_and_find_dupes(self):

        phash = os.urandom(8)

        # fake-import the files with the phash

        (size, mime, width, height, duration, num_frames, has_audio,
         num_words) = (65535, HC.IMAGE_JPEG, 640, 480, None, None, False, None)

        for hash in self._all_hashes:

            fake_file_import_job = ClientImportFileSeeds.FileImportJob(
                'fake path')

            fake_file_import_job._hash = hash
            fake_file_import_job._file_info = (size, mime, width, height,
                                               duration, num_frames, has_audio,
                                               num_words)
            fake_file_import_job._extra_hashes = (b'abcd', b'abcd', b'abcd')
            fake_file_import_job._phashes = [phash]
            fake_file_import_job._file_import_options = ClientImportOptions.FileImportOptions(
            )

            self._write('import_file', fake_file_import_job)

        # run search maintenance

        self._write('maintain_similar_files_tree')

        self._write('maintain_similar_files_search_for_potential_duplicates',
                    0)
Esempio n. 2
0
 def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
     
     if version == 1:
         
         serialisable_watchers = old_serialisable_info
         
         try:
             
             checker_options = HG.client_controller.new_options.GetDefaultWatcherCheckerOptions()
             file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud' )
             tag_import_options = ClientImportOptions.TagImportOptions( is_default = True )
             
         except:
             
             checker_options = ClientImportOptions.CheckerOptions()
             file_import_options = ClientImportOptions.FileImportOptions()
             tag_import_options = ClientImportOptions.TagImportOptions()
             
         
         serialisable_checker_options = checker_options.GetSerialisableTuple()
         serialisable_file_import_options = file_import_options.GetSerialisableTuple()
         serialisable_tag_import_options = tag_import_options.GetSerialisableTuple()
         
         highlighted_watcher_key = None
         
         serialisable_highlighted_watcher_key = highlighted_watcher_key
         
         new_serialisable_info = ( serialisable_watchers, serialisable_highlighted_watcher_key, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options )
         
         return ( 2, new_serialisable_info )
Esempio n. 3
0
    def GetNewHashes(self):

        with self._lock:

            file_import_options = ClientImportOptions.FileImportOptions()

            file_import_options.SetPresentationOptions(True, False, False)

            return self._file_seed_cache.GetPresentedHashes(
                file_import_options)
Esempio n. 4
0
 def test_SERIALISABLE_TYPE_SUBSCRIPTION( self ):
     
     def test( obj, dupe_obj ):
         
         self.assertEqual( obj.GetName(), dupe_obj.GetName() )
         
         self.assertEqual( obj._gug_key_and_name, dupe_obj._gug_key_and_name )
         self.assertEqual( len( obj._queries ), len( dupe_obj._queries ) )
         self.assertEqual( obj._initial_file_limit, dupe_obj._initial_file_limit )
         self.assertEqual( obj._periodic_file_limit, dupe_obj._periodic_file_limit )
         self.assertEqual( obj._paused, dupe_obj._paused )
         
         self.assertEqual( obj._file_import_options.GetSerialisableTuple(), dupe_obj._file_import_options.GetSerialisableTuple() )
         self.assertEqual( obj._tag_import_options.GetSerialisableTuple(), dupe_obj._tag_import_options.GetSerialisableTuple() )
         
         self.assertEqual( obj._no_work_until, dupe_obj._no_work_until )
         
     
     sub = ClientImportSubscriptions.Subscription( 'test sub' )
     
     self._dump_and_load_and_test( sub, test )
     
     gug_key_and_name = ( HydrusData.GenerateKey(), 'muh test gug' )
     queries = [ ClientImportSubscriptionQuery.SubscriptionQuery( 'test query' ), ClientImportSubscriptionQuery.SubscriptionQuery( 'test query 2' ) ]
     checker_options = ClientImportOptions.CheckerOptions()
     initial_file_limit = 100
     periodic_file_limit = 50
     paused = False
     
     file_import_options = ClientImportOptions.FileImportOptions()
     
     service_tag_import_options = ClientImportOptions.ServiceTagImportOptions( get_tags = False, additional_tags = { 'test additional tag', 'and another' } )
     
     tag_import_options = ClientImportOptions.TagImportOptions( service_keys_to_service_tag_import_options = { HydrusData.GenerateKey() : service_tag_import_options } )
     
     no_work_until = HydrusData.GetNow() - 86400 * 20
     
     sub.SetTuple( gug_key_and_name, checker_options, initial_file_limit, periodic_file_limit, paused, file_import_options, tag_import_options, no_work_until )
     
     sub.SetQueries( queries )
     
     self.assertEqual( sub.GetGUGKeyAndName(), gug_key_and_name )
     self.assertEqual( sub.GetTagImportOptions(), tag_import_options )
     self.assertEqual( sub.GetQueries(), queries )
     
     self.assertEqual( sub._paused, False )
     sub.PauseResume()
     self.assertEqual( sub._paused, True )
     sub.PauseResume()
     self.assertEqual( sub._paused, False )
     
     self._dump_and_load_and_test( sub, test )
Esempio n. 5
0
    def _do_fake_imports(self):

        self._md5_to_sha256 = {}
        self._sha256_to_md5 = {}
        self._sha256_to_sha1 = {}

        self._my_files_sha256 = set()

        self._hashes_to_current_tags = {}
        self._hashes_to_pending_tags = {}
        self._hashes_to_deleted_tags = {}

        (size, mime, width, height, duration, num_frames, has_audio,
         num_words) = (65535, HC.IMAGE_JPEG, 640, 480, None, None, False, None)

        for i in range(100):

            hash = HydrusData.GenerateKey()
            md5 = os.urandom(16)
            sha1 = os.urandom(20)
            sha512 = os.urandom(64)

            self._md5_to_sha256[md5] = hash
            self._sha256_to_md5[hash] = md5
            self._sha256_to_sha1[hash] = sha1

            self._hashes_to_current_tags[hash] = set(
                random.sample(current_tag_pool, 3))
            self._hashes_to_pending_tags[hash] = set(
                random.sample(pending_tag_pool, 3))
            self._hashes_to_deleted_tags[hash] = set(
                random.sample(deleted_tag_pool, 3))

            if i < 50:

                fake_file_import_job = ClientImportFileSeeds.FileImportJob(
                    'fake path')

                fake_file_import_job._hash = hash
                fake_file_import_job._file_info = (size, mime, width, height,
                                                   duration, num_frames,
                                                   has_audio, num_words)
                fake_file_import_job._extra_hashes = (md5, sha1, sha512)
                fake_file_import_job._phashes = [os.urandom(8)]
                fake_file_import_job._file_import_options = ClientImportOptions.FileImportOptions(
                )

                self.WriteSynchronous('import_file', fake_file_import_job)

                self._my_files_sha256.add(hash)
Esempio n. 6
0
 def _ShowFilesInNewPage( self, show = 'all' ):
     
     file_seed_cache = self._file_seed_cache_get_callable()
     
     if show == 'all':
         
         hashes = file_seed_cache.GetHashes()
         
     elif show == 'new':
         
         file_import_options = ClientImportOptions.FileImportOptions()
         
         file_import_options.SetPresentationOptions( True, False, False )
         
         hashes = file_seed_cache.GetPresentedHashes( file_import_options )
         
     
     if len( hashes ) > 0:
         
         HG.client_controller.pub( 'new_page_query', CC.LOCAL_FILE_SERVICE_KEY, initial_hashes = hashes )
Esempio n. 7
0
    def MainLoop(self):

        hashes_still_to_download_in_this_run = set()
        total_hashes_in_this_run = 0
        total_successful_hashes_in_this_run = 0

        while not (HydrusThreading.IsThreadShuttingDown()
                   or self._shutting_down or HG.view_shutdown):

            with self._lock:

                if len(self._pending_hashes) > 0:

                    if total_hashes_in_this_run == 0:

                        job_key = ClientThreading.JobKey(cancellable=True)

                        job_key.SetStatusTitle('downloading')

                        job_key.SetVariable('popup_text_1',
                                            'initialising downloader')

                        job_key_pub_job = self._controller.CallLater(
                            2.0, self._controller.pub, 'message', job_key)

                    num_before = len(hashes_still_to_download_in_this_run)

                    hashes_still_to_download_in_this_run.update(
                        self._pending_hashes)

                    num_after = len(hashes_still_to_download_in_this_run)

                    total_hashes_in_this_run += num_after - num_before

                    self._pending_hashes = set()

            if len(hashes_still_to_download_in_this_run) == 0:

                total_hashes_in_this_run = 0
                total_successful_hashes_in_this_run = 0

                self._new_files_event.wait(5)

                self._new_files_event.clear()

                continue

            if job_key.IsCancelled():

                hashes_still_to_download_in_this_run = set()

                continue

            hash = random.sample(hashes_still_to_download_in_this_run, 1)[0]

            hashes_still_to_download_in_this_run.discard(hash)

            total_done = total_hashes_in_this_run - len(
                hashes_still_to_download_in_this_run)

            job_key.SetVariable(
                'popup_text_1',
                'downloading files from remote services: {}'.format(
                    HydrusData.ConvertValueRangeToPrettyString(
                        total_done, total_hashes_in_this_run)))
            job_key.SetVariable('popup_gauge_1',
                                (total_done, total_hashes_in_this_run))

            try:

                errors_occured = []
                file_successful = False

                media_result = self._controller.Read('media_result', hash)

                service_keys = list(
                    media_result.GetLocationsManager().GetCurrent())

                random.shuffle(service_keys)

                if CC.COMBINED_LOCAL_FILE_SERVICE_KEY in service_keys:

                    total_successful_hashes_in_this_run += 1

                    continue

                for service_key in service_keys:

                    try:

                        service = self._controller.services_manager.GetService(
                            service_key)

                    except:

                        continue

                    try:

                        if service.GetServiceType() == HC.FILE_REPOSITORY:

                            file_repository = service

                            if file_repository.IsFunctional():

                                (os_file_handle,
                                 temp_path) = HydrusPaths.GetTempPath()

                                try:

                                    file_repository.Request(
                                        HC.GET,
                                        'file', {'hash': hash},
                                        temp_path=temp_path)

                                    exclude_deleted = False  # this is the important part here
                                    do_not_check_known_urls_before_importing = False
                                    do_not_check_hashes_before_importing = False
                                    allow_decompression_bombs = True
                                    min_size = None
                                    max_size = None
                                    max_gif_size = None
                                    min_resolution = None
                                    max_resolution = None
                                    automatic_archive = False
                                    associate_source_urls = True

                                    file_import_options = ClientImportOptions.FileImportOptions(
                                    )

                                    file_import_options.SetPreImportOptions(
                                        exclude_deleted,
                                        do_not_check_known_urls_before_importing,
                                        do_not_check_hashes_before_importing,
                                        allow_decompression_bombs, min_size,
                                        max_size, max_gif_size, min_resolution,
                                        max_resolution)
                                    file_import_options.SetPostImportOptions(
                                        automatic_archive,
                                        associate_source_urls)

                                    file_import_job = ClientImportFileSeeds.FileImportJob(
                                        temp_path, file_import_options)

                                    file_import_job.DoWork()

                                    file_successful = True

                                    break

                                finally:

                                    HydrusPaths.CleanUpTempPath(
                                        os_file_handle, temp_path)

                        elif service.GetServiceType() == HC.IPFS:

                            multihashes = HG.client_controller.Read(
                                'service_filenames', service_key, {hash})

                            if len(multihashes) > 0:

                                multihash = multihashes[0]

                                service.ImportFile(multihash, silent=True)

                                file_successful = True

                                break

                    except Exception as e:

                        errors_occured.append(e)

                if file_successful:

                    total_successful_hashes_in_this_run += 1

                if len(errors_occured) > 0:

                    if not file_successful:

                        raise errors_occured[0]

            except Exception as e:

                HydrusData.ShowException(e)

                hashes_still_to_download_in_this_run = 0

            finally:

                if len(hashes_still_to_download_in_this_run) == 0:

                    job_key.DeleteVariable('popup_text_1')
                    job_key.DeleteVariable('popup_gauge_1')

                    if total_successful_hashes_in_this_run > 0:

                        job_key.SetVariable(
                            'popup_text_1',
                            HydrusData.ToHumanInt(
                                total_successful_hashes_in_this_run) +
                            ' files downloaded')

                    job_key_pub_job.Cancel()

                    job_key.Finish()

                    job_key.Delete(1)