def DisassociatePHashes(self, hash_id, phash_ids):

        self._c.executemany(
            'DELETE FROM shape_perceptual_hash_map WHERE phash_id = ? AND hash_id = ?;',
            ((phash_id, hash_id) for phash_id in phash_ids))

        useful_phash_ids = {
            phash
            for (phash, ) in self._c.execute(
                'SELECT phash_id FROM shape_perceptual_hash_map WHERE phash_id IN '
                + HydrusData.SplayListForDB(phash_ids) + ';')
        }

        useless_phash_ids = phash_ids.difference(useful_phash_ids)

        self._c.executemany(
            'INSERT OR IGNORE INTO shape_maintenance_branch_regen ( phash_id ) VALUES ( ? );',
            ((phash_id, ) for phash_id in useless_phash_ids))
Exemple #2
0
    def GetFileHashes(self, given_hashes, given_hash_type,
                      desired_hash_type) -> typing.Collection[bytes]:

        if given_hash_type == 'sha256':

            hash_ids = self.GetHashIds(given_hashes)

        else:

            hash_ids = []

            for given_hash in given_hashes:

                if given_hash is None:

                    continue

                result = self._Execute(
                    'SELECT hash_id FROM local_hashes WHERE {} = ?;'.format(
                        given_hash_type),
                    (sqlite3.Binary(given_hash), )).fetchone()

                if result is not None:

                    (hash_id, ) = result

                    hash_ids.append(hash_id)

        if desired_hash_type == 'sha256':

            desired_hashes = self.GetHashes(hash_ids)

        else:

            desired_hashes = [
                desired_hash for (desired_hash, ) in self._Execute(
                    'SELECT {} FROM local_hashes WHERE hash_id IN {};'.format(
                        desired_hash_type, HydrusData.SplayListForDB(
                            hash_ids)))
            ]

        return desired_hashes
    def GetCurrentFilesCount(self, service_id, only_viewable=False):

        current_files_table_name = GenerateFilesTableName(
            service_id, HC.CONTENT_STATUS_CURRENT)

        if only_viewable:

            # hashes to mimes
            result = self._c.execute(
                'SELECT COUNT( * ) FROM {} CROSS JOIN files_info USING ( hash_id ) WHERE mime IN {};'
                .format(current_files_table_name,
                        HydrusData.SplayListForDB(
                            HC.SEARCHABLE_MIMES))).fetchone()

        else:

            result = self._c.execute('SELECT COUNT( * ) FROM {};'.format(
                current_files_table_name)).fetchone()

        (count, ) = result

        return count
    def GetRepositoryUpdateHashesICanProcess(self, service_key: bytes,
                                             content_types_to_process):

        # it is important that we use lists and sort by update index!
        # otherwise add/delete actions can occur in the wrong order

        service_id = self.modules_services.GetServiceId(service_key)

        (repository_updates_table_name,
         repository_unregistered_updates_table_name,
         repository_updates_processed_table_name
         ) = GenerateRepositoryUpdatesTableNames(service_id)

        result = self._Execute(
            'SELECT 1 FROM {} WHERE content_type = ? AND processed = ?;'.
            format(repository_updates_processed_table_name),
            (HC.CONTENT_TYPE_DEFINITIONS, True)).fetchone()

        this_is_first_definitions_work = result is None

        result = self._Execute(
            'SELECT 1 FROM {} WHERE content_type != ? AND processed = ?;'.
            format(repository_updates_processed_table_name),
            (HC.CONTENT_TYPE_DEFINITIONS, True)).fetchone()

        this_is_first_content_work = result is None

        min_unregistered_update_index = None

        result = self._Execute(
            'SELECT MIN( update_index ) FROM {} CROSS JOIN {} USING ( hash_id );'
            .format(repository_unregistered_updates_table_name,
                    repository_updates_table_name)).fetchone()

        if result is not None:

            (min_unregistered_update_index, ) = result

        predicate_phrase = 'processed = ? AND content_type IN {}'.format(
            HydrusData.SplayListForDB(content_types_to_process))

        if min_unregistered_update_index is not None:

            # can't process an update if any of its files are as yet unregistered (these are both unprocessed and unavailable)
            # also, we mustn't skip any update indices, so if there is an invalid one, we won't do any after that!

            predicate_phrase = '{} AND update_index < {}'.format(
                predicate_phrase, min_unregistered_update_index)

        query = 'SELECT update_index, hash_id, content_type FROM {} CROSS JOIN {} USING ( hash_id ) WHERE {};'.format(
            repository_updates_processed_table_name,
            repository_updates_table_name, predicate_phrase)

        rows = self._Execute(query, (False, )).fetchall()

        update_indices_to_unprocessed_hash_ids = HydrusData.BuildKeyToSetDict(
            ((update_index, hash_id)
             for (update_index, hash_id, content_type) in rows))
        hash_ids_to_content_types_to_process = HydrusData.BuildKeyToSetDict(
            ((hash_id, content_type)
             for (update_index, hash_id, content_type) in rows))

        all_hash_ids = set(hash_ids_to_content_types_to_process.keys())

        all_local_hash_ids = self.modules_files_storage.FilterHashIdsToStatus(
            self.modules_services.local_update_service_id, all_hash_ids,
            HC.CONTENT_STATUS_CURRENT)

        for sorted_update_index in sorted(
                update_indices_to_unprocessed_hash_ids.keys()):

            unprocessed_hash_ids = update_indices_to_unprocessed_hash_ids[
                sorted_update_index]

            if not unprocessed_hash_ids.issubset(all_local_hash_ids):

                # can't process an update if any of its unprocessed files are not local
                # normally they'll always be available if registered, but just in case a user deletes one manually etc...
                # also, we mustn't skip any update indices, so if there is an invalid one, we won't do any after that!

                update_indices_to_unprocessed_hash_ids = {
                    update_index: unprocessed_hash_ids
                    for (update_index, unprocessed_hash_ids
                         ) in update_indices_to_unprocessed_hash_ids.items()
                    if update_index < sorted_update_index
                }

                break

        # all the hashes are now good to go

        all_hash_ids = set(
            itertools.chain.from_iterable(
                update_indices_to_unprocessed_hash_ids.values()))

        hash_ids_to_hashes = self.modules_hashes_local_cache.GetHashIdsToHashes(
            hash_ids=all_hash_ids)

        definition_hashes_and_content_types = []
        content_hashes_and_content_types = []

        if len(update_indices_to_unprocessed_hash_ids) > 0:

            for update_index in sorted(
                    update_indices_to_unprocessed_hash_ids.keys()):

                unprocessed_hash_ids = update_indices_to_unprocessed_hash_ids[
                    update_index]

                definition_hash_ids = {
                    hash_id
                    for hash_id in unprocessed_hash_ids
                    if HC.CONTENT_TYPE_DEFINITIONS in
                    hash_ids_to_content_types_to_process[hash_id]
                }
                content_hash_ids = {
                    hash_id
                    for hash_id in unprocessed_hash_ids
                    if hash_id not in definition_hash_ids
                }

                for (hash_ids, hashes_and_content_types) in [
                    (definition_hash_ids, definition_hashes_and_content_types),
                    (content_hash_ids, content_hashes_and_content_types)
                ]:

                    hashes_and_content_types.extend(
                        ((hash_ids_to_hashes[hash_id],
                          hash_ids_to_content_types_to_process[hash_id])
                         for hash_id in hash_ids))

        return (this_is_first_definitions_work,
                definition_hashes_and_content_types,
                this_is_first_content_work, content_hashes_and_content_types)