def _import_and_find_dupes(self): phash = os.urandom(8) # fake-import the files with the phash (size, mime, width, height, duration, num_frames, has_audio, num_words) = (65535, HC.IMAGE_JPEG, 640, 480, None, None, False, None) for hash in self._all_hashes: fake_file_import_job = ClientImportFileSeeds.FileImportJob( 'fake path') fake_file_import_job._hash = hash fake_file_import_job._file_info = (size, mime, width, height, duration, num_frames, has_audio, num_words) fake_file_import_job._extra_hashes = (b'abcd', b'abcd', b'abcd') fake_file_import_job._phashes = [phash] fake_file_import_job._file_import_options = ClientImportOptions.FileImportOptions( ) self._write('import_file', fake_file_import_job) # run search maintenance self._write('maintain_similar_files_tree') self._write('maintain_similar_files_search_for_potential_duplicates', 0)
def _UpdateSerialisableInfo( self, version, old_serialisable_info ): if version == 1: serialisable_watchers = old_serialisable_info try: checker_options = HG.client_controller.new_options.GetDefaultWatcherCheckerOptions() file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud' ) tag_import_options = ClientImportOptions.TagImportOptions( is_default = True ) except: checker_options = ClientImportOptions.CheckerOptions() file_import_options = ClientImportOptions.FileImportOptions() tag_import_options = ClientImportOptions.TagImportOptions() serialisable_checker_options = checker_options.GetSerialisableTuple() serialisable_file_import_options = file_import_options.GetSerialisableTuple() serialisable_tag_import_options = tag_import_options.GetSerialisableTuple() highlighted_watcher_key = None serialisable_highlighted_watcher_key = highlighted_watcher_key new_serialisable_info = ( serialisable_watchers, serialisable_highlighted_watcher_key, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options ) return ( 2, new_serialisable_info )
def GetNewHashes(self): with self._lock: file_import_options = ClientImportOptions.FileImportOptions() file_import_options.SetPresentationOptions(True, False, False) return self._file_seed_cache.GetPresentedHashes( file_import_options)
def test_SERIALISABLE_TYPE_SUBSCRIPTION( self ): def test( obj, dupe_obj ): self.assertEqual( obj.GetName(), dupe_obj.GetName() ) self.assertEqual( obj._gug_key_and_name, dupe_obj._gug_key_and_name ) self.assertEqual( len( obj._queries ), len( dupe_obj._queries ) ) self.assertEqual( obj._initial_file_limit, dupe_obj._initial_file_limit ) self.assertEqual( obj._periodic_file_limit, dupe_obj._periodic_file_limit ) self.assertEqual( obj._paused, dupe_obj._paused ) self.assertEqual( obj._file_import_options.GetSerialisableTuple(), dupe_obj._file_import_options.GetSerialisableTuple() ) self.assertEqual( obj._tag_import_options.GetSerialisableTuple(), dupe_obj._tag_import_options.GetSerialisableTuple() ) self.assertEqual( obj._no_work_until, dupe_obj._no_work_until ) sub = ClientImportSubscriptions.Subscription( 'test sub' ) self._dump_and_load_and_test( sub, test ) gug_key_and_name = ( HydrusData.GenerateKey(), 'muh test gug' ) queries = [ ClientImportSubscriptionQuery.SubscriptionQuery( 'test query' ), ClientImportSubscriptionQuery.SubscriptionQuery( 'test query 2' ) ] checker_options = ClientImportOptions.CheckerOptions() initial_file_limit = 100 periodic_file_limit = 50 paused = False file_import_options = ClientImportOptions.FileImportOptions() service_tag_import_options = ClientImportOptions.ServiceTagImportOptions( get_tags = False, additional_tags = { 'test additional tag', 'and another' } ) tag_import_options = ClientImportOptions.TagImportOptions( service_keys_to_service_tag_import_options = { HydrusData.GenerateKey() : service_tag_import_options } ) no_work_until = HydrusData.GetNow() - 86400 * 20 sub.SetTuple( gug_key_and_name, checker_options, initial_file_limit, periodic_file_limit, paused, file_import_options, tag_import_options, no_work_until ) sub.SetQueries( queries ) self.assertEqual( sub.GetGUGKeyAndName(), gug_key_and_name ) self.assertEqual( sub.GetTagImportOptions(), tag_import_options ) self.assertEqual( sub.GetQueries(), queries ) self.assertEqual( sub._paused, False ) sub.PauseResume() self.assertEqual( sub._paused, True ) sub.PauseResume() self.assertEqual( sub._paused, False ) self._dump_and_load_and_test( sub, test )
def _do_fake_imports(self): self._md5_to_sha256 = {} self._sha256_to_md5 = {} self._sha256_to_sha1 = {} self._my_files_sha256 = set() self._hashes_to_current_tags = {} self._hashes_to_pending_tags = {} self._hashes_to_deleted_tags = {} (size, mime, width, height, duration, num_frames, has_audio, num_words) = (65535, HC.IMAGE_JPEG, 640, 480, None, None, False, None) for i in range(100): hash = HydrusData.GenerateKey() md5 = os.urandom(16) sha1 = os.urandom(20) sha512 = os.urandom(64) self._md5_to_sha256[md5] = hash self._sha256_to_md5[hash] = md5 self._sha256_to_sha1[hash] = sha1 self._hashes_to_current_tags[hash] = set( random.sample(current_tag_pool, 3)) self._hashes_to_pending_tags[hash] = set( random.sample(pending_tag_pool, 3)) self._hashes_to_deleted_tags[hash] = set( random.sample(deleted_tag_pool, 3)) if i < 50: fake_file_import_job = ClientImportFileSeeds.FileImportJob( 'fake path') fake_file_import_job._hash = hash fake_file_import_job._file_info = (size, mime, width, height, duration, num_frames, has_audio, num_words) fake_file_import_job._extra_hashes = (md5, sha1, sha512) fake_file_import_job._phashes = [os.urandom(8)] fake_file_import_job._file_import_options = ClientImportOptions.FileImportOptions( ) self.WriteSynchronous('import_file', fake_file_import_job) self._my_files_sha256.add(hash)
def _ShowFilesInNewPage( self, show = 'all' ): file_seed_cache = self._file_seed_cache_get_callable() if show == 'all': hashes = file_seed_cache.GetHashes() elif show == 'new': file_import_options = ClientImportOptions.FileImportOptions() file_import_options.SetPresentationOptions( True, False, False ) hashes = file_seed_cache.GetPresentedHashes( file_import_options ) if len( hashes ) > 0: HG.client_controller.pub( 'new_page_query', CC.LOCAL_FILE_SERVICE_KEY, initial_hashes = hashes )
def MainLoop(self): hashes_still_to_download_in_this_run = set() total_hashes_in_this_run = 0 total_successful_hashes_in_this_run = 0 while not (HydrusThreading.IsThreadShuttingDown() or self._shutting_down or HG.view_shutdown): with self._lock: if len(self._pending_hashes) > 0: if total_hashes_in_this_run == 0: job_key = ClientThreading.JobKey(cancellable=True) job_key.SetStatusTitle('downloading') job_key.SetVariable('popup_text_1', 'initialising downloader') job_key_pub_job = self._controller.CallLater( 2.0, self._controller.pub, 'message', job_key) num_before = len(hashes_still_to_download_in_this_run) hashes_still_to_download_in_this_run.update( self._pending_hashes) num_after = len(hashes_still_to_download_in_this_run) total_hashes_in_this_run += num_after - num_before self._pending_hashes = set() if len(hashes_still_to_download_in_this_run) == 0: total_hashes_in_this_run = 0 total_successful_hashes_in_this_run = 0 self._new_files_event.wait(5) self._new_files_event.clear() continue if job_key.IsCancelled(): hashes_still_to_download_in_this_run = set() continue hash = random.sample(hashes_still_to_download_in_this_run, 1)[0] hashes_still_to_download_in_this_run.discard(hash) total_done = total_hashes_in_this_run - len( hashes_still_to_download_in_this_run) job_key.SetVariable( 'popup_text_1', 'downloading files from remote services: {}'.format( HydrusData.ConvertValueRangeToPrettyString( total_done, total_hashes_in_this_run))) job_key.SetVariable('popup_gauge_1', (total_done, total_hashes_in_this_run)) try: errors_occured = [] file_successful = False media_result = self._controller.Read('media_result', hash) service_keys = list( media_result.GetLocationsManager().GetCurrent()) random.shuffle(service_keys) if CC.COMBINED_LOCAL_FILE_SERVICE_KEY in service_keys: total_successful_hashes_in_this_run += 1 continue for service_key in service_keys: try: service = self._controller.services_manager.GetService( service_key) except: continue try: if service.GetServiceType() == HC.FILE_REPOSITORY: file_repository = service if file_repository.IsFunctional(): (os_file_handle, temp_path) = HydrusPaths.GetTempPath() try: file_repository.Request( HC.GET, 'file', {'hash': hash}, temp_path=temp_path) exclude_deleted = False # this is the important part here do_not_check_known_urls_before_importing = False do_not_check_hashes_before_importing = False allow_decompression_bombs = True min_size = None max_size = None max_gif_size = None min_resolution = None max_resolution = None automatic_archive = False associate_source_urls = True file_import_options = ClientImportOptions.FileImportOptions( ) file_import_options.SetPreImportOptions( exclude_deleted, do_not_check_known_urls_before_importing, do_not_check_hashes_before_importing, allow_decompression_bombs, min_size, max_size, max_gif_size, min_resolution, max_resolution) file_import_options.SetPostImportOptions( automatic_archive, associate_source_urls) file_import_job = ClientImportFileSeeds.FileImportJob( temp_path, file_import_options) file_import_job.DoWork() file_successful = True break finally: HydrusPaths.CleanUpTempPath( os_file_handle, temp_path) elif service.GetServiceType() == HC.IPFS: multihashes = HG.client_controller.Read( 'service_filenames', service_key, {hash}) if len(multihashes) > 0: multihash = multihashes[0] service.ImportFile(multihash, silent=True) file_successful = True break except Exception as e: errors_occured.append(e) if file_successful: total_successful_hashes_in_this_run += 1 if len(errors_occured) > 0: if not file_successful: raise errors_occured[0] except Exception as e: HydrusData.ShowException(e) hashes_still_to_download_in_this_run = 0 finally: if len(hashes_still_to_download_in_this_run) == 0: job_key.DeleteVariable('popup_text_1') job_key.DeleteVariable('popup_gauge_1') if total_successful_hashes_in_this_run > 0: job_key.SetVariable( 'popup_text_1', HydrusData.ToHumanInt( total_successful_hashes_in_this_run) + ' files downloaded') job_key_pub_job.Cancel() job_key.Finish() job_key.Delete(1)