def _import_and_find_dupes(self): phash = os.urandom(8) # fake-import the files with the phash (size, mime, width, height, duration, num_frames, has_audio, num_words) = (65535, HC.IMAGE_JPEG, 640, 480, None, None, False, None) for hash in self._all_hashes: fake_file_import_job = ClientImportFileSeeds.FileImportJob( 'fake path') fake_file_import_job._hash = hash fake_file_import_job._file_info = (size, mime, width, height, duration, num_frames, has_audio, num_words) fake_file_import_job._extra_hashes = (b'abcd', b'abcd', b'abcd') fake_file_import_job._phashes = [phash] fake_file_import_job._file_import_options = ClientImportOptions.FileImportOptions( ) self._write('import_file', fake_file_import_job) # run search maintenance self._write('maintain_similar_files_tree') self._write('maintain_similar_files_search_for_potential_duplicates', 0)
def _do_fake_imports(self): self._md5_to_sha256 = {} self._sha256_to_md5 = {} self._sha256_to_sha1 = {} self._my_files_sha256 = set() self._hashes_to_current_tags = {} self._hashes_to_pending_tags = {} self._hashes_to_deleted_tags = {} (size, mime, width, height, duration, num_frames, has_audio, num_words) = (65535, HC.IMAGE_JPEG, 640, 480, None, None, False, None) for i in range(100): hash = HydrusData.GenerateKey() md5 = os.urandom(16) sha1 = os.urandom(20) sha512 = os.urandom(64) self._md5_to_sha256[md5] = hash self._sha256_to_md5[hash] = md5 self._sha256_to_sha1[hash] = sha1 self._hashes_to_current_tags[hash] = set( random.sample(current_tag_pool, 3)) self._hashes_to_pending_tags[hash] = set( random.sample(pending_tag_pool, 3)) self._hashes_to_deleted_tags[hash] = set( random.sample(deleted_tag_pool, 3)) if i < 50: fake_file_import_job = ClientImportFileSeeds.FileImportJob( 'fake path') fake_file_import_job._hash = hash fake_file_import_job._file_info = (size, mime, width, height, duration, num_frames, has_audio, num_words) fake_file_import_job._extra_hashes = (md5, sha1, sha512) fake_file_import_job._phashes = [os.urandom(8)] fake_file_import_job._file_import_options = ClientImportOptions.FileImportOptions( ) self.WriteSynchronous('import_file', fake_file_import_job) self._my_files_sha256.add(hash)
def MainLoop(self): hashes_still_to_download_in_this_run = set() total_hashes_in_this_run = 0 total_successful_hashes_in_this_run = 0 while not (HydrusThreading.IsThreadShuttingDown() or self._shutting_down or HG.view_shutdown): with self._lock: if len(self._pending_hashes) > 0: if total_hashes_in_this_run == 0: job_key = ClientThreading.JobKey(cancellable=True) job_key.SetStatusTitle('downloading') job_key.SetVariable('popup_text_1', 'initialising downloader') job_key_pub_job = self._controller.CallLater( 2.0, self._controller.pub, 'message', job_key) num_before = len(hashes_still_to_download_in_this_run) hashes_still_to_download_in_this_run.update( self._pending_hashes) num_after = len(hashes_still_to_download_in_this_run) total_hashes_in_this_run += num_after - num_before self._pending_hashes = set() if len(hashes_still_to_download_in_this_run) == 0: total_hashes_in_this_run = 0 total_successful_hashes_in_this_run = 0 self._new_files_event.wait(5) self._new_files_event.clear() continue if job_key.IsCancelled(): hashes_still_to_download_in_this_run = set() continue hash = random.sample(hashes_still_to_download_in_this_run, 1)[0] hashes_still_to_download_in_this_run.discard(hash) total_done = total_hashes_in_this_run - len( hashes_still_to_download_in_this_run) job_key.SetVariable( 'popup_text_1', 'downloading files from remote services: {}'.format( HydrusData.ConvertValueRangeToPrettyString( total_done, total_hashes_in_this_run))) job_key.SetVariable('popup_gauge_1', (total_done, total_hashes_in_this_run)) try: errors_occured = [] file_successful = False media_result = self._controller.Read('media_result', hash) service_keys = list( media_result.GetLocationsManager().GetCurrent()) random.shuffle(service_keys) if CC.COMBINED_LOCAL_FILE_SERVICE_KEY in service_keys: total_successful_hashes_in_this_run += 1 continue for service_key in service_keys: try: service = self._controller.services_manager.GetService( service_key) except: continue try: if service.GetServiceType() == HC.FILE_REPOSITORY: file_repository = service if file_repository.IsFunctional(): (os_file_handle, temp_path) = HydrusPaths.GetTempPath() try: file_repository.Request( HC.GET, 'file', {'hash': hash}, temp_path=temp_path) exclude_deleted = False # this is the important part here do_not_check_known_urls_before_importing = False do_not_check_hashes_before_importing = False allow_decompression_bombs = True min_size = None max_size = None max_gif_size = None min_resolution = None max_resolution = None automatic_archive = False associate_source_urls = True file_import_options = ClientImportOptions.FileImportOptions( ) file_import_options.SetPreImportOptions( exclude_deleted, do_not_check_known_urls_before_importing, do_not_check_hashes_before_importing, allow_decompression_bombs, min_size, max_size, max_gif_size, min_resolution, max_resolution) file_import_options.SetPostImportOptions( automatic_archive, associate_source_urls) file_import_job = ClientImportFileSeeds.FileImportJob( temp_path, file_import_options) file_import_job.DoWork() file_successful = True break finally: HydrusPaths.CleanUpTempPath( os_file_handle, temp_path) elif service.GetServiceType() == HC.IPFS: multihashes = HG.client_controller.Read( 'service_filenames', service_key, {hash}) if len(multihashes) > 0: multihash = multihashes[0] service.ImportFile(multihash, silent=True) file_successful = True break except Exception as e: errors_occured.append(e) if file_successful: total_successful_hashes_in_this_run += 1 if len(errors_occured) > 0: if not file_successful: raise errors_occured[0] except Exception as e: HydrusData.ShowException(e) hashes_still_to_download_in_this_run = 0 finally: if len(hashes_still_to_download_in_this_run) == 0: job_key.DeleteVariable('popup_text_1') job_key.DeleteVariable('popup_gauge_1') if total_successful_hashes_in_this_run > 0: job_key.SetVariable( 'popup_text_1', HydrusData.ToHumanInt( total_successful_hashes_in_this_run) + ' files downloaded') job_key_pub_job.Cancel() job_key.Finish() job_key.Delete(1)