def _import_and_find_dupes( self ): phash = os.urandom( 8 ) # fake-import the files with the phash ( size, mime, width, height, duration, num_frames, has_audio, num_words ) = ( 65535, HC.IMAGE_JPEG, 640, 480, None, None, False, None ) file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud' ) for hash in self._all_hashes: fake_file_import_job = ClientImportFiles.FileImportJob( 'fake path', file_import_options ) fake_file_import_job._pre_import_file_status = ClientImportFiles.FileImportStatus( CC.STATUS_UNKNOWN, hash ) fake_file_import_job._file_info = ( size, mime, width, height, duration, num_frames, has_audio, num_words ) fake_file_import_job._extra_hashes = ( b'abcd', b'abcd', b'abcd' ) fake_file_import_job._phashes = [ phash ] fake_file_import_job._file_import_options = FileImportOptions.FileImportOptions() self._write( 'import_file', fake_file_import_job ) # run search maintenance self._write( 'maintain_similar_files_tree' ) self._write( 'maintain_similar_files_search_for_potential_duplicates', 0 )
def _do_fake_imports(self): self._md5_to_sha256 = {} self._sha256_to_md5 = {} self._sha256_to_sha1 = {} self._my_files_sha256 = set() self._hashes_to_current_tags = {} self._hashes_to_pending_tags = {} self._hashes_to_deleted_tags = {} (size, mime, width, height, duration, num_frames, has_audio, num_words) = (65535, HC.IMAGE_JPEG, 640, 480, None, None, False, None) file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud') for i in range(100): hash = HydrusData.GenerateKey() md5 = os.urandom(16) sha1 = os.urandom(20) sha512 = os.urandom(64) self._md5_to_sha256[md5] = hash self._sha256_to_md5[hash] = md5 self._sha256_to_sha1[hash] = sha1 self._hashes_to_current_tags[hash] = set( random.sample(current_tag_pool, 3)) self._hashes_to_pending_tags[hash] = set( random.sample(pending_tag_pool, 3)) self._hashes_to_deleted_tags[hash] = set( random.sample(deleted_tag_pool, 3)) if i < 50: fake_file_import_job = ClientImportFiles.FileImportJob( 'fake path', file_import_options) fake_file_import_job._pre_import_file_status = ClientImportFiles.FileImportStatus( CC.STATUS_UNKNOWN, hash) fake_file_import_job._file_info = (size, mime, width, height, duration, num_frames, has_audio, num_words) fake_file_import_job._extra_hashes = (md5, sha1, sha512) fake_file_import_job._perceptual_hashes = [os.urandom(8)] fake_file_import_job._file_import_options = FileImportOptions.FileImportOptions( ) self.WriteSynchronous('import_file', fake_file_import_job) self._my_files_sha256.add(hash)
def WriteSynchronous(self, name, *args, **kwargs): self._write_call_args[name].append((args, kwargs)) if name == 'import_file': (file_import_job, ) = args if file_import_job.GetHash().hex( ) == 'a593942cb7ea9ffcd8ccf2f0fa23c338e23bfecd9a3e508dfc0bcf07501ead08': # 'blarg' in sha256 hex raise Exception('File failed to import for some reason!') else: h = file_import_job.GetHash() if h is None: h = os.urandom(32) return ClientImportFiles.FileImportStatus( CC.STATUS_SUCCESSFUL_AND_NEW, h, note='test note')
def MainLoop(self): hashes_still_to_download_in_this_run = set() total_hashes_in_this_run = 0 total_successful_hashes_in_this_run = 0 while not (HydrusThreading.IsThreadShuttingDown() or self._shutting_down or HG.view_shutdown): with self._lock: if len(self._pending_hashes) > 0: if total_hashes_in_this_run == 0: job_key = ClientThreading.JobKey(cancellable=True) job_key.SetStatusTitle('downloading') job_key.SetVariable('popup_text_1', 'initialising downloader') job_key_pub_job = self._controller.CallLater( 2.0, self._controller.pub, 'message', job_key) num_before = len(hashes_still_to_download_in_this_run) hashes_still_to_download_in_this_run.update( self._pending_hashes) num_after = len(hashes_still_to_download_in_this_run) total_hashes_in_this_run += num_after - num_before self._pending_hashes = set() if len(hashes_still_to_download_in_this_run) == 0: total_hashes_in_this_run = 0 total_successful_hashes_in_this_run = 0 self._new_files_event.wait(5) self._new_files_event.clear() continue if job_key.IsCancelled(): hashes_still_to_download_in_this_run = set() continue hash = random.sample(hashes_still_to_download_in_this_run, 1)[0] hashes_still_to_download_in_this_run.discard(hash) total_done = total_hashes_in_this_run - len( hashes_still_to_download_in_this_run) job_key.SetVariable( 'popup_text_1', 'downloading files from remote services: {}'.format( HydrusData.ConvertValueRangeToPrettyString( total_done, total_hashes_in_this_run))) job_key.SetVariable('popup_gauge_1', (total_done, total_hashes_in_this_run)) try: errors_occured = [] file_successful = False media_result = self._controller.Read('media_result', hash) service_keys = list( media_result.GetLocationsManager().GetCurrent()) random.shuffle(service_keys) if CC.COMBINED_LOCAL_FILE_SERVICE_KEY in service_keys: total_successful_hashes_in_this_run += 1 continue for service_key in service_keys: try: service = self._controller.services_manager.GetService( service_key) except: continue try: if service.GetServiceType() == HC.FILE_REPOSITORY: file_repository = service if file_repository.IsFunctional(): (os_file_handle, temp_path) = HydrusTemp.GetTempPath() try: file_repository.Request( HC.GET, 'file', {'hash': hash}, temp_path=temp_path) exclude_deleted = False # this is the important part here do_not_check_known_urls_before_importing = False do_not_check_hashes_before_importing = False allow_decompression_bombs = True min_size = None max_size = None max_gif_size = None min_resolution = None max_resolution = None automatic_archive = False associate_primary_urls = True associate_source_urls = True file_import_options = FileImportOptions.FileImportOptions( ) file_import_options.SetPreImportOptions( exclude_deleted, do_not_check_known_urls_before_importing, do_not_check_hashes_before_importing, allow_decompression_bombs, min_size, max_size, max_gif_size, min_resolution, max_resolution) file_import_options.SetPostImportOptions( automatic_archive, associate_primary_urls, associate_source_urls) file_import_job = ClientImportFiles.FileImportJob( temp_path, file_import_options) file_import_job.DoWork() file_successful = True break finally: HydrusTemp.CleanUpTempPath( os_file_handle, temp_path) elif service.GetServiceType() == HC.IPFS: multihashes = HG.client_controller.Read( 'service_filenames', service_key, {hash}) if len(multihashes) > 0: multihash = multihashes[0] service.ImportFile(multihash, silent=True) file_successful = True break except Exception as e: errors_occured.append(e) if file_successful: total_successful_hashes_in_this_run += 1 if len(errors_occured) > 0: if not file_successful: raise errors_occured[0] except Exception as e: HydrusData.ShowException(e) hashes_still_to_download_in_this_run = 0 finally: if len(hashes_still_to_download_in_this_run) == 0: job_key.DeleteVariable('popup_text_1') job_key.DeleteVariable('popup_gauge_1') if total_successful_hashes_in_this_run > 0: job_key.SetVariable( 'popup_text_1', HydrusData.ToHumanInt( total_successful_hashes_in_this_run) + ' files downloaded') job_key_pub_job.Cancel() job_key.Finish() job_key.Delete(1)
def GetHashIdStatus(self, hash_id, prefix='') -> ClientImportFiles.FileImportStatus: if prefix != '': prefix += ': ' hash = self.modules_hashes_local_cache.GetHash(hash_id) (is_deleted, timestamp, file_deletion_reason) = self.modules_files_storage.GetDeletionStatus( self.modules_services.combined_local_file_service_id, hash_id) if is_deleted: if timestamp is None: note = 'Deleted from the client before delete times were tracked ({}).'.format( file_deletion_reason) else: note = 'Deleted from the client {} ({}), which was {} before this check.'.format( HydrusData.ConvertTimestampToPrettyTime(timestamp), file_deletion_reason, HydrusData.BaseTimestampToPrettyTimeDelta(timestamp)) return ClientImportFiles.FileImportStatus(CC.STATUS_DELETED, hash, note=prefix + note) result = self.modules_files_storage.GetCurrentTimestamp( self.modules_services.trash_service_id, hash_id) if result is not None: timestamp = result note = 'Currently in trash ({}). Sent there at {}, which was {} before this check.'.format( file_deletion_reason, HydrusData.ConvertTimestampToPrettyTime(timestamp), HydrusData.BaseTimestampToPrettyTimeDelta( timestamp, just_now_threshold=0)) return ClientImportFiles.FileImportStatus(CC.STATUS_DELETED, hash, note=prefix + note) result = self.modules_files_storage.GetCurrentTimestamp( self.modules_services.combined_local_file_service_id, hash_id) if result is not None: timestamp = result mime = self.modules_files_metadata_basic.GetMime(hash_id) note = 'Imported at {}, which was {} before this check.'.format( HydrusData.ConvertTimestampToPrettyTime(timestamp), HydrusData.BaseTimestampToPrettyTimeDelta( timestamp, just_now_threshold=0)) return ClientImportFiles.FileImportStatus( CC.STATUS_SUCCESSFUL_BUT_REDUNDANT, hash, mime=mime, note=prefix + note) return ClientImportFiles.FileImportStatus(CC.STATUS_UNKNOWN, hash)