예제 #1
0
    def _import_and_find_dupes(self):

        phash = os.urandom(8)

        # fake-import the files with the phash

        (size, mime, width, height, duration, num_frames, has_audio,
         num_words) = (65535, HC.IMAGE_JPEG, 640, 480, None, None, False, None)

        for hash in self._all_hashes:

            fake_file_import_job = ClientImportFileSeeds.FileImportJob(
                'fake path')

            fake_file_import_job._hash = hash
            fake_file_import_job._file_info = (size, mime, width, height,
                                               duration, num_frames, has_audio,
                                               num_words)
            fake_file_import_job._extra_hashes = (b'abcd', b'abcd', b'abcd')
            fake_file_import_job._phashes = [phash]
            fake_file_import_job._file_import_options = ClientImportOptions.FileImportOptions(
            )

            self._write('import_file', fake_file_import_job)

        # run search maintenance

        self._write('maintain_similar_files_tree')

        self._write('maintain_similar_files_search_for_potential_duplicates',
                    0)
예제 #2
0
    def _do_fake_imports(self):

        self._md5_to_sha256 = {}
        self._sha256_to_md5 = {}
        self._sha256_to_sha1 = {}

        self._my_files_sha256 = set()

        self._hashes_to_current_tags = {}
        self._hashes_to_pending_tags = {}
        self._hashes_to_deleted_tags = {}

        (size, mime, width, height, duration, num_frames, has_audio,
         num_words) = (65535, HC.IMAGE_JPEG, 640, 480, None, None, False, None)

        for i in range(100):

            hash = HydrusData.GenerateKey()
            md5 = os.urandom(16)
            sha1 = os.urandom(20)
            sha512 = os.urandom(64)

            self._md5_to_sha256[md5] = hash
            self._sha256_to_md5[hash] = md5
            self._sha256_to_sha1[hash] = sha1

            self._hashes_to_current_tags[hash] = set(
                random.sample(current_tag_pool, 3))
            self._hashes_to_pending_tags[hash] = set(
                random.sample(pending_tag_pool, 3))
            self._hashes_to_deleted_tags[hash] = set(
                random.sample(deleted_tag_pool, 3))

            if i < 50:

                fake_file_import_job = ClientImportFileSeeds.FileImportJob(
                    'fake path')

                fake_file_import_job._hash = hash
                fake_file_import_job._file_info = (size, mime, width, height,
                                                   duration, num_frames,
                                                   has_audio, num_words)
                fake_file_import_job._extra_hashes = (md5, sha1, sha512)
                fake_file_import_job._phashes = [os.urandom(8)]
                fake_file_import_job._file_import_options = ClientImportOptions.FileImportOptions(
                )

                self.WriteSynchronous('import_file', fake_file_import_job)

                self._my_files_sha256.add(hash)
예제 #3
0
    def MainLoop(self):

        hashes_still_to_download_in_this_run = set()
        total_hashes_in_this_run = 0
        total_successful_hashes_in_this_run = 0

        while not (HydrusThreading.IsThreadShuttingDown()
                   or self._shutting_down or HG.view_shutdown):

            with self._lock:

                if len(self._pending_hashes) > 0:

                    if total_hashes_in_this_run == 0:

                        job_key = ClientThreading.JobKey(cancellable=True)

                        job_key.SetStatusTitle('downloading')

                        job_key.SetVariable('popup_text_1',
                                            'initialising downloader')

                        job_key_pub_job = self._controller.CallLater(
                            2.0, self._controller.pub, 'message', job_key)

                    num_before = len(hashes_still_to_download_in_this_run)

                    hashes_still_to_download_in_this_run.update(
                        self._pending_hashes)

                    num_after = len(hashes_still_to_download_in_this_run)

                    total_hashes_in_this_run += num_after - num_before

                    self._pending_hashes = set()

            if len(hashes_still_to_download_in_this_run) == 0:

                total_hashes_in_this_run = 0
                total_successful_hashes_in_this_run = 0

                self._new_files_event.wait(5)

                self._new_files_event.clear()

                continue

            if job_key.IsCancelled():

                hashes_still_to_download_in_this_run = set()

                continue

            hash = random.sample(hashes_still_to_download_in_this_run, 1)[0]

            hashes_still_to_download_in_this_run.discard(hash)

            total_done = total_hashes_in_this_run - len(
                hashes_still_to_download_in_this_run)

            job_key.SetVariable(
                'popup_text_1',
                'downloading files from remote services: {}'.format(
                    HydrusData.ConvertValueRangeToPrettyString(
                        total_done, total_hashes_in_this_run)))
            job_key.SetVariable('popup_gauge_1',
                                (total_done, total_hashes_in_this_run))

            try:

                errors_occured = []
                file_successful = False

                media_result = self._controller.Read('media_result', hash)

                service_keys = list(
                    media_result.GetLocationsManager().GetCurrent())

                random.shuffle(service_keys)

                if CC.COMBINED_LOCAL_FILE_SERVICE_KEY in service_keys:

                    total_successful_hashes_in_this_run += 1

                    continue

                for service_key in service_keys:

                    try:

                        service = self._controller.services_manager.GetService(
                            service_key)

                    except:

                        continue

                    try:

                        if service.GetServiceType() == HC.FILE_REPOSITORY:

                            file_repository = service

                            if file_repository.IsFunctional():

                                (os_file_handle,
                                 temp_path) = HydrusPaths.GetTempPath()

                                try:

                                    file_repository.Request(
                                        HC.GET,
                                        'file', {'hash': hash},
                                        temp_path=temp_path)

                                    exclude_deleted = False  # this is the important part here
                                    do_not_check_known_urls_before_importing = False
                                    do_not_check_hashes_before_importing = False
                                    allow_decompression_bombs = True
                                    min_size = None
                                    max_size = None
                                    max_gif_size = None
                                    min_resolution = None
                                    max_resolution = None
                                    automatic_archive = False
                                    associate_source_urls = True

                                    file_import_options = ClientImportOptions.FileImportOptions(
                                    )

                                    file_import_options.SetPreImportOptions(
                                        exclude_deleted,
                                        do_not_check_known_urls_before_importing,
                                        do_not_check_hashes_before_importing,
                                        allow_decompression_bombs, min_size,
                                        max_size, max_gif_size, min_resolution,
                                        max_resolution)
                                    file_import_options.SetPostImportOptions(
                                        automatic_archive,
                                        associate_source_urls)

                                    file_import_job = ClientImportFileSeeds.FileImportJob(
                                        temp_path, file_import_options)

                                    file_import_job.DoWork()

                                    file_successful = True

                                    break

                                finally:

                                    HydrusPaths.CleanUpTempPath(
                                        os_file_handle, temp_path)

                        elif service.GetServiceType() == HC.IPFS:

                            multihashes = HG.client_controller.Read(
                                'service_filenames', service_key, {hash})

                            if len(multihashes) > 0:

                                multihash = multihashes[0]

                                service.ImportFile(multihash, silent=True)

                                file_successful = True

                                break

                    except Exception as e:

                        errors_occured.append(e)

                if file_successful:

                    total_successful_hashes_in_this_run += 1

                if len(errors_occured) > 0:

                    if not file_successful:

                        raise errors_occured[0]

            except Exception as e:

                HydrusData.ShowException(e)

                hashes_still_to_download_in_this_run = 0

            finally:

                if len(hashes_still_to_download_in_this_run) == 0:

                    job_key.DeleteVariable('popup_text_1')
                    job_key.DeleteVariable('popup_gauge_1')

                    if total_successful_hashes_in_this_run > 0:

                        job_key.SetVariable(
                            'popup_text_1',
                            HydrusData.ToHumanInt(
                                total_successful_hashes_in_this_run) +
                            ' files downloaded')

                    job_key_pub_job.Cancel()

                    job_key.Finish()

                    job_key.Delete(1)